library(readr)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(naniar)
library(tidyr)
file_path <- "/Users/eylulruyagullu/Desktop/match_data"
match_data <- read_csv(file_path)
## Rows: 63944 Columns: 106
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): halftime, name, current_state, final_score, result
## dbl (94): fixture_id, minute, second, 1, 2, X, Accurate Crosses - away, Acc...
## lgl (3): suspended, stopped, ticking
## dttm (4): current_time, half_start_datetime, match_start_datetime, latest_b...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
match_data_2 <- match_data
head(match_data)
## # A tibble: 6 × 106
## fixture_id halftime current_time half_start_datetime
## <dbl> <chr> <dttm> <dttm>
## 1 19172016 1st-half 2024-08-09 18:11:36 2024-08-09 18:01:37
## 2 19172016 1st-half 2024-08-09 18:27:30 2024-08-09 18:01:37
## 3 19172016 1st-half 2024-08-09 18:28:25 2024-08-09 18:01:37
## 4 19172016 1st-half 2024-08-09 18:29:32 2024-08-09 18:01:37
## 5 19172016 1st-half 2024-08-09 18:30:37 2024-08-09 18:01:37
## 6 19172016 1st-half 2024-08-09 18:31:18 2024-08-09 18:01:37
## # ℹ 102 more variables: match_start_datetime <dttm>, minute <dbl>,
## # second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## # stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## # `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## # `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## # `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>,
## # `Ball Possession % - home` <dbl>, `Ball Safe - away` <dbl>, …
nrow(match_data)
## [1] 63944
ncol(match_data)
## [1] 106
summary(match_data)
## fixture_id halftime current_time
## Min. :19134453 Length:63944 Min. :2024-08-09 18:11:36.00
## 1st Qu.:19135301 Class :character 1st Qu.:2024-09-01 16:41:41.00
## Median :19139737 Mode :character Median :2024-09-28 16:38:41.00
## Mean :19148005 Mean :2024-09-29 16:21:24.78
## 3rd Qu.:19155126 3rd Qu.:2024-10-26 14:44:19.00
## Max. :19172117 Max. :2024-11-10 21:57:41.00
##
## half_start_datetime match_start_datetime
## Min. :2024-08-09 18:01:37.0 Min. :2024-08-09 18:01:37.00
## 1st Qu.:2024-09-01 16:05:58.0 1st Qu.:2024-09-01 15:30:07.00
## Median :2024-09-28 16:04:35.0 Median :2024-09-28 16:00:55.00
## Mean :2024-09-29 15:55:28.5 Mean :2024-09-29 15:22:04.30
## 3rd Qu.:2024-10-26 14:11:14.0 3rd Qu.:2024-10-26 13:33:01.00
## Max. :2024-11-10 21:06:59.0 Max. :2024-11-10 20:02:05.00
##
## minute second latest_bookmaker_update
## Min. : 0.00 Min. : 0.0 Min. :2024-08-09 18:11:28.00
## 1st Qu.:13.00 1st Qu.:10.0 1st Qu.:2024-09-01 16:40:19.00
## Median :25.00 Median :27.0 Median :2024-09-28 16:38:29.50
## Mean :25.48 Mean :27.7 Mean :2024-09-29 16:20:30.19
## 3rd Qu.:38.00 3rd Qu.:44.0 3rd Qu.:2024-10-26 14:44:08.25
## Max. :80.00 Max. :59.0 Max. :2024-11-10 21:55:36.00
##
## suspended stopped 1 2
## Mode :logical Mode :logical Min. : 1.00 Min. : 1.00
## FALSE:56127 FALSE:59829 1st Qu.: 1.33 1st Qu.: 1.95
## TRUE :7817 TRUE :4115 Median : 2.50 Median : 4.00
## Mean : 18.51 Mean : 29.48
## 3rd Qu.: 5.50 3rd Qu.: 13.00
## Max. :501.00 Max. :501.00
##
## X name ticking Accurate Crosses - away
## Min. : 1.000 Length:63944 Mode:logical Min. : 0.00
## 1st Qu.: 3.000 Class :character TRUE:63944 1st Qu.: 0.00
## Median : 3.750 Mode :character Median : 2.00
## Mean : 8.698 Mean : 2.01
## 3rd Qu.: 7.500 3rd Qu.: 3.00
## Max. :51.000 Max. :12.00
## NA's :3028
## Accurate Crosses - home Assists - away Assists - home Attacks - away
## Min. : 0.000 Min. :0.000 Min. :0.000 Min. : 0.00
## 1st Qu.: 1.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.: 21.00
## Median : 2.000 Median :1.000 Median :1.000 Median : 43.00
## Mean : 2.631 Mean :0.813 Mean :0.901 Mean : 45.91
## 3rd Qu.: 4.000 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.: 67.00
## Max. :14.000 Max. :5.000 Max. :6.000 Max. :180.00
## NA's :3007 NA's :27934 NA's :27931 NA's :39
## Attacks - home Ball Possession % - away Ball Possession % - home
## Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 24.00 1st Qu.: 39.00 1st Qu.: 41.00
## Median : 48.00 Median : 49.00 Median : 51.00
## Mean : 50.89 Mean : 49.02 Mean : 50.98
## 3rd Qu.: 74.00 3rd Qu.: 59.00 3rd Qu.: 61.00
## Max. :178.00 Max. :100.00 Max. :100.00
## NA's :33 NA's :23 NA's :22
## Ball Safe - away Ball Safe - home Challenges - away Challenges - home
## Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0.000
## 1st Qu.: 16.00 1st Qu.: 15.00 1st Qu.: 2.000 1st Qu.: 2.000
## Median : 32.00 Median : 31.00 Median : 4.000 Median : 4.000
## Mean : 32.81 Mean : 31.52 Mean : 4.606 Mean : 4.543
## 3rd Qu.: 48.00 3rd Qu.: 45.00 3rd Qu.: 7.000 3rd Qu.: 6.000
## Max. :266.00 Max. :277.00 Max. :21.000 Max. :23.000
## NA's :23428 NA's :23424 NA's :4681 NA's :4674
## Corners - away Corners - home Counter Attacks - away
## Min. : 0.000 Min. : 0.000 Min. : 0.00
## 1st Qu.: 0.000 1st Qu.: 1.000 1st Qu.: 0.00
## Median : 2.000 Median : 2.000 Median : 1.00
## Mean : 2.139 Mean : 2.641 Mean : 1.37
## 3rd Qu.: 3.000 3rd Qu.: 4.000 3rd Qu.: 2.00
## Max. :18.000 Max. :17.000 Max. :10.00
## NA's :74 NA's :66 NA's :41211
## Counter Attacks - home Dangerous Attacks - away Dangerous Attacks - home
## Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 1.00 1st Qu.: 8.00 1st Qu.: 9.00
## Median : 1.00 Median : 17.00 Median : 21.00
## Mean : 1.58 Mean : 19.91 Mean : 23.81
## 3rd Qu.: 2.00 3rd Qu.: 29.00 3rd Qu.: 34.00
## Max. :13.00 Max. :163.00 Max. :140.00
## NA's :41209 NA's :53 NA's :47
## Dribble Attempts - away Dribble Attempts - home Fouls - away
## Min. : 1.000 Min. : 1.000 Min. : 0.000
## 1st Qu.: 4.000 1st Qu.: 4.000 1st Qu.: 3.000
## Median : 7.000 Median : 7.000 Median : 6.000
## Mean : 8.238 Mean : 8.359 Mean : 6.269
## 3rd Qu.:12.000 3rd Qu.:12.000 3rd Qu.: 9.000
## Max. :33.000 Max. :37.000 Max. :28.000
## NA's :7869 NA's :7844 NA's :1627
## Fouls - home Free Kicks - away Free Kicks - home Goal Attempts - away
## Min. : 0.000 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 3.000 1st Qu.: 3.00 1st Qu.: 2.00 1st Qu.: 1.00
## Median : 5.000 Median : 6.00 Median : 6.00 Median : 3.00
## Mean : 6.008 Mean : 6.74 Mean : 6.09 Mean : 3.25
## 3rd Qu.: 9.000 3rd Qu.:10.00 3rd Qu.: 9.00 3rd Qu.: 5.00
## Max. :25.000 Max. :27.00 Max. :19.00 Max. :17.00
## NA's :1615 NA's :58465 NA's :58465 NA's :27536
## Goal Attempts - home Goal Kicks - away Goal Kicks - home Goals - away
## Min. : 0.000 Min. : 0.00 Min. : 0.000 Min. :0.0000
## 1st Qu.: 2.000 1st Qu.: 1.00 1st Qu.: 1.000 1st Qu.:0.0000
## Median : 4.000 Median : 3.00 Median : 3.000 Median :0.0000
## Mean : 4.013 Mean : 4.06 Mean : 3.399 Mean :0.6547
## 3rd Qu.: 6.000 3rd Qu.: 6.00 3rd Qu.: 5.000 3rd Qu.:1.0000
## Max. :24.000 Max. :24.00 Max. :17.000 Max. :6.0000
## NA's :27529 NA's :4685 NA's :4678 NA's :39
## Goals - home Headers - away Headers - home Hit Woodwork - away
## Min. :0.0000 Min. : 0.00 Min. : 0.00 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.: 5.00 1st Qu.: 6.00 1st Qu.:0.0000
## Median :0.0000 Median :11.00 Median :12.00 Median :0.0000
## Mean :0.7384 Mean :12.73 Mean :13.56 Mean :0.1439
## 3rd Qu.:1.0000 3rd Qu.:18.00 3rd Qu.:20.00 3rd Qu.:0.0000
## Max. :8.0000 Max. :62.00 Max. :72.00 Max. :3.0000
## NA's :36 NA's :10413 NA's :10406 NA's :198
## Hit Woodwork - home Injuries - away Injuries - home Interceptions - away
## Min. :0.0000 Min. :0.00 Min. :0.00 Min. : 0.000
## 1st Qu.:0.0000 1st Qu.:0.00 1st Qu.:0.00 1st Qu.: 2.000
## Median :0.0000 Median :1.00 Median :1.00 Median : 4.000
## Mean :0.1637 Mean :0.95 Mean :0.79 Mean : 4.628
## 3rd Qu.:0.0000 3rd Qu.:1.00 3rd Qu.:1.00 3rd Qu.: 7.000
## Max. :3.0000 Max. :6.00 Max. :6.00 Max. :21.000
## NA's :177 NA's :46318 NA's :46316 NA's :4255
## Interceptions - home Key Passes - away Key Passes - home Long Passes - away
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.00
## 1st Qu.: 2.000 1st Qu.: 2.000 1st Qu.: 2.000 1st Qu.: 6.00
## Median : 4.000 Median : 4.000 Median : 5.000 Median :11.00
## Mean : 4.569 Mean : 4.372 Mean : 5.356 Mean :11.71
## 3rd Qu.: 6.000 3rd Qu.: 6.000 3rd Qu.: 8.000 3rd Qu.:17.00
## Max. :23.000 Max. :20.000 Max. :26.000 Max. :53.00
## NA's :4234 NA's :5517 NA's :5513 NA's :5525
## Long Passes - home Offsides - away Offsides - home Passes - away
## Min. : 0.00 Min. :0.000 Min. : 0.000 Min. : 0.0
## 1st Qu.: 6.00 1st Qu.:0.000 1st Qu.: 0.000 1st Qu.:105.0
## Median :11.00 Median :1.000 Median : 1.000 Median :208.0
## Mean :12.12 Mean :1.158 Mean : 1.416 Mean :221.6
## 3rd Qu.:17.00 3rd Qu.:2.000 3rd Qu.: 2.000 3rd Qu.:316.0
## Max. :47.00 Max. :8.000 Max. :11.000 Max. :912.0
## NA's :5514 NA's :17877 NA's :17872 NA's :292
## Passes - home Penalties - away Penalties - home Redcards - away
## Min. : 0.0 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:110.0 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :215.0 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :228.4 Mean :0.07503 Mean :0.09525 Mean :0.03389
## 3rd Qu.:328.0 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :860.0 Max. :3.00000 Max. :2.00000 Max. :2.00000
## NA's :277 NA's :79 NA's :72 NA's :65
## Redcards - home Saves - away Saves - home Score Change - away
## Min. :0.00000 Min. : 0.000 Min. :0.000 Min. :-1.00000
## 1st Qu.:0.00000 1st Qu.: 1.000 1st Qu.:1.000 1st Qu.: 0.00000
## Median :0.00000 Median : 2.000 Median :1.000 Median : 0.00000
## Mean :0.03573 Mean : 1.962 Mean :1.541 Mean : 0.01317
## 3rd Qu.:0.00000 3rd Qu.: 3.000 3rd Qu.:2.000 3rd Qu.: 0.00000
## Max. :1.00000 Max. :13.000 Max. :8.000 Max. : 2.00000
## NA's :55 NA's :10769 NA's :10758
## Score Change - home Shots Blocked - away Shots Blocked - home
## Min. :-1.00000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.00000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 0.00000 Median : 1.000 Median : 1.000
## Mean : 0.01551 Mean : 1.505 Mean : 1.794
## 3rd Qu.: 0.00000 3rd Qu.: 2.000 3rd Qu.: 3.000
## Max. : 2.00000 Max. :13.000 Max. :13.000
## NA's :236 NA's :223
## Shots Insidebox - away Shots Insidebox - home Shots Off Target - away
## Min. : 0.00 Min. : 0.000 Min. : 0.000
## 1st Qu.: 1.00 1st Qu.: 1.000 1st Qu.: 0.000
## Median : 3.00 Median : 3.000 Median : 2.000
## Mean : 3.37 Mean : 4.282 Mean : 2.095
## 3rd Qu.: 5.00 3rd Qu.: 6.000 3rd Qu.: 3.000
## Max. :19.00 Max. :27.000 Max. :12.000
## NA's :267 NA's :247 NA's :36
## Shots Off Target - home Shots On Target - away Shots On Target - home
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 1.000 1st Qu.: 0.000 1st Qu.: 1.000
## Median : 2.000 Median : 1.000 Median : 2.000
## Mean : 2.563 Mean : 1.914 Mean : 2.329
## 3rd Qu.: 4.000 3rd Qu.: 3.000 3rd Qu.: 4.000
## Max. :17.000 Max. :12.000 Max. :15.000
## NA's :28 NA's :35 NA's :28
## Shots Outsidebox - away Shots Outsidebox - home Shots Total - away
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 2.000
## Median : 1.000 Median : 2.000 Median : 5.000
## Mean : 1.918 Mean : 2.142 Mean : 5.417
## 3rd Qu.: 3.000 3rd Qu.: 3.000 3rd Qu.: 8.000
## Max. :15.000 Max. :18.000 Max. :26.000
## NA's :272 NA's :248
## Shots Total - home Substitutions - away Substitutions - home
## Min. : 0.000 Min. :0.000 Min. :0.00
## 1st Qu.: 2.000 1st Qu.:0.000 1st Qu.:0.00
## Median : 6.000 Median :0.000 Median :0.00
## Mean : 6.585 Mean :1.272 Mean :1.19
## 3rd Qu.:10.000 3rd Qu.:2.000 3rd Qu.:2.00
## Max. :35.000 Max. :8.000 Max. :9.00
## NA's :85 NA's :75
## Successful Dribbles - away Successful Dribbles - home
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 1.000 1st Qu.: 1.000
## Median : 3.000 Median : 3.000
## Mean : 3.529 Mean : 3.622
## 3rd Qu.: 5.000 3rd Qu.: 5.000
## Max. :20.000 Max. :21.000
## NA's :5323 NA's :5306
## Successful Headers - away Successful Headers - home
## Min. : 0.00 Min. : 0.000
## 1st Qu.: 3.00 1st Qu.: 3.000
## Median : 6.00 Median : 6.000
## Mean : 6.55 Mean : 6.638
## 3rd Qu.: 9.00 3rd Qu.:10.000
## Max. :31.00 Max. :34.000
## NA's :10429 NA's :10418
## Successful Interceptions - away Successful Interceptions - home
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 3.000 1st Qu.: 3.000
## Median : 7.000 Median : 7.000
## Mean : 7.744 Mean : 7.841
## 3rd Qu.:11.000 3rd Qu.:11.000
## Max. :31.000 Max. :33.000
## NA's :2676 NA's :2670
## Successful Passes - away Successful Passes - home
## Min. : 0.0 Min. : 0.0
## 1st Qu.: 84.0 1st Qu.: 89.0
## Median :165.0 Median :175.0
## Mean :183.5 Mean :191.1
## 3rd Qu.:261.0 3rd Qu.:275.0
## Max. :857.0 Max. :792.0
## NA's :391 NA's :389
## Successful Passes Percentage - away Successful Passes Percentage - home
## Min. : 0.00 Min. : 0.00
## 1st Qu.: 76.00 1st Qu.: 77.00
## Median : 81.00 Median : 83.00
## Mean : 79.45 Mean : 80.52
## 3rd Qu.: 86.00 3rd Qu.: 87.00
## Max. :100.00 Max. :100.00
## NA's :163 NA's :153
## Tackles - away Tackles - home Throwins - away Throwins - home
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 4.000 1st Qu.: 4.000 1st Qu.: 4.000 1st Qu.: 4.000
## Median : 8.000 Median : 8.000 Median : 8.000 Median : 8.000
## Mean : 8.411 Mean : 8.614 Mean : 8.751 Mean : 9.091
## 3rd Qu.:12.000 3rd Qu.:13.000 3rd Qu.:13.000 3rd Qu.:13.000
## Max. :33.000 Max. :33.000 Max. :34.000 Max. :45.000
## NA's :1960 NA's :1949 NA's :1133 NA's :1126
## Total Crosses - away Total Crosses - home Yellowcards - away
## Min. : 0.000 Min. : 0.000 Min. :0.0000
## 1st Qu.: 3.000 1st Qu.: 4.000 1st Qu.:0.0000
## Median : 7.000 Median : 9.000 Median :1.0000
## Mean : 7.767 Mean : 9.852 Mean :0.9116
## 3rd Qu.:11.000 3rd Qu.:14.000 3rd Qu.:1.0000
## Max. :45.000 Max. :46.000 Max. :9.0000
## NA's :2215 NA's :2204 NA's :80
## Yellowcards - home Yellowred Cards - away Yellowred Cards - home
## Min. :0.0000 Min. :0.000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.0000 Median :0.000 Median :0.000
## Mean :0.8026 Mean :0.012 Mean :0.006
## 3rd Qu.:1.0000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :7.0000 Max. :2.000 Max. :1.000
## NA's :69 NA's :23244 NA's :23237
## current_state final_score result
## Length:63944 Length:63944 Length:63944
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
for (col in colnames(match_data)) {
print(col)
}
## [1] "fixture_id"
## [1] "halftime"
## [1] "current_time"
## [1] "half_start_datetime"
## [1] "match_start_datetime"
## [1] "minute"
## [1] "second"
## [1] "latest_bookmaker_update"
## [1] "suspended"
## [1] "stopped"
## [1] "1"
## [1] "2"
## [1] "X"
## [1] "name"
## [1] "ticking"
## [1] "Accurate Crosses - away"
## [1] "Accurate Crosses - home"
## [1] "Assists - away"
## [1] "Assists - home"
## [1] "Attacks - away"
## [1] "Attacks - home"
## [1] "Ball Possession % - away"
## [1] "Ball Possession % - home"
## [1] "Ball Safe - away"
## [1] "Ball Safe - home"
## [1] "Challenges - away"
## [1] "Challenges - home"
## [1] "Corners - away"
## [1] "Corners - home"
## [1] "Counter Attacks - away"
## [1] "Counter Attacks - home"
## [1] "Dangerous Attacks - away"
## [1] "Dangerous Attacks - home"
## [1] "Dribble Attempts - away"
## [1] "Dribble Attempts - home"
## [1] "Fouls - away"
## [1] "Fouls - home"
## [1] "Free Kicks - away"
## [1] "Free Kicks - home"
## [1] "Goal Attempts - away"
## [1] "Goal Attempts - home"
## [1] "Goal Kicks - away"
## [1] "Goal Kicks - home"
## [1] "Goals - away"
## [1] "Goals - home"
## [1] "Headers - away"
## [1] "Headers - home"
## [1] "Hit Woodwork - away"
## [1] "Hit Woodwork - home"
## [1] "Injuries - away"
## [1] "Injuries - home"
## [1] "Interceptions - away"
## [1] "Interceptions - home"
## [1] "Key Passes - away"
## [1] "Key Passes - home"
## [1] "Long Passes - away"
## [1] "Long Passes - home"
## [1] "Offsides - away"
## [1] "Offsides - home"
## [1] "Passes - away"
## [1] "Passes - home"
## [1] "Penalties - away"
## [1] "Penalties - home"
## [1] "Redcards - away"
## [1] "Redcards - home"
## [1] "Saves - away"
## [1] "Saves - home"
## [1] "Score Change - away"
## [1] "Score Change - home"
## [1] "Shots Blocked - away"
## [1] "Shots Blocked - home"
## [1] "Shots Insidebox - away"
## [1] "Shots Insidebox - home"
## [1] "Shots Off Target - away"
## [1] "Shots Off Target - home"
## [1] "Shots On Target - away"
## [1] "Shots On Target - home"
## [1] "Shots Outsidebox - away"
## [1] "Shots Outsidebox - home"
## [1] "Shots Total - away"
## [1] "Shots Total - home"
## [1] "Substitutions - away"
## [1] "Substitutions - home"
## [1] "Successful Dribbles - away"
## [1] "Successful Dribbles - home"
## [1] "Successful Headers - away"
## [1] "Successful Headers - home"
## [1] "Successful Interceptions - away"
## [1] "Successful Interceptions - home"
## [1] "Successful Passes - away"
## [1] "Successful Passes - home"
## [1] "Successful Passes Percentage - away"
## [1] "Successful Passes Percentage - home"
## [1] "Tackles - away"
## [1] "Tackles - home"
## [1] "Throwins - away"
## [1] "Throwins - home"
## [1] "Total Crosses - away"
## [1] "Total Crosses - home"
## [1] "Yellowcards - away"
## [1] "Yellowcards - home"
## [1] "Yellowred Cards - away"
## [1] "Yellowred Cards - home"
## [1] "current_state"
## [1] "final_score"
## [1] "result"
In order to analyze the number of minutes for each match, grouping was done in the code based on fixture_id. In the grouped data, the total minutes recorded for each match were calculated and the results were summarized under the minute_count variable. This summary table was then sorted by the number of minutes and arranged for a detailed analysis. In this way, the data for each match was made easier to compare based on minute records.
match_data_summary <- match_data %>%
group_by(fixture_id) %>%
summarise(minute_count = n()) %>%
arrange(minute_count)
print(match_data_summary)
## # A tibble: 648 × 2
## fixture_id minute_count
## <dbl> <int>
## 1 19172016 39
## 2 19172012 47
## 3 19172089 52
## 4 19172085 54
## 5 19172014 70
## 6 19155091 76
## 7 19172043 81
## 8 19139670 84
## 9 19172044 85
## 10 19155095 86
## # ℹ 638 more rows
The main purpose of this code is to clean the data by removing
duplicate rows from the match_data dataset. First, it sorts
the data based on the fixture_id and
current_time columns, then removes duplicates that contain
the same values. Finally, it calculates the number of deleted rows.
match_data <- match_data[order(match_data$fixture_id, match_data$current_time), ]
rownames(match_data) <- NULL
before_removal <- nrow(match_data)
match_data <- match_data[!duplicated(match_data[, c("fixture_id", "current_time")]), ]
after_removal <- nrow(match_data)
cat("Amount of deleted rows:", before_removal - after_removal, "\n")
## Amount of deleted rows: 0
print(head(match_data))
## # A tibble: 6 × 106
## fixture_id halftime current_time half_start_datetime
## <dbl> <chr> <dttm> <dttm>
## 1 19134453 1st-half 2024-08-16 19:01:19 2024-08-16 19:00:31
## 2 19134453 1st-half 2024-08-16 19:02:18 2024-08-16 19:00:31
## 3 19134453 1st-half 2024-08-16 19:03:19 2024-08-16 19:00:31
## 4 19134453 1st-half 2024-08-16 19:04:18 2024-08-16 19:00:31
## 5 19134453 1st-half 2024-08-16 19:05:19 2024-08-16 19:00:31
## 6 19134453 1st-half 2024-08-16 19:06:18 2024-08-16 19:00:31
## # ℹ 102 more variables: match_start_datetime <dttm>, minute <dbl>,
## # second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## # stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## # `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## # `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## # `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>,
## # `Ball Possession % - home` <dbl>, `Ball Safe - away` <dbl>, …
match_data
## # A tibble: 63,944 × 106
## fixture_id halftime current_time half_start_datetime
## <dbl> <chr> <dttm> <dttm>
## 1 19134453 1st-half 2024-08-16 19:01:19 2024-08-16 19:00:31
## 2 19134453 1st-half 2024-08-16 19:02:18 2024-08-16 19:00:31
## 3 19134453 1st-half 2024-08-16 19:03:19 2024-08-16 19:00:31
## 4 19134453 1st-half 2024-08-16 19:04:18 2024-08-16 19:00:31
## 5 19134453 1st-half 2024-08-16 19:05:19 2024-08-16 19:00:31
## 6 19134453 1st-half 2024-08-16 19:06:18 2024-08-16 19:00:31
## 7 19134453 1st-half 2024-08-16 19:07:18 2024-08-16 19:00:31
## 8 19134453 1st-half 2024-08-16 19:08:19 2024-08-16 19:00:31
## 9 19134453 1st-half 2024-08-16 19:09:19 2024-08-16 19:00:31
## 10 19134453 1st-half 2024-08-16 19:10:18 2024-08-16 19:00:31
## # ℹ 63,934 more rows
## # ℹ 102 more variables: match_start_datetime <dttm>, minute <dbl>,
## # second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## # stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## # `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## # `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## # `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>, …
nrow(match_data)
## [1] 63944
table(match_data$suspended)
##
## FALSE TRUE
## 56127 7817
table(match_data$stopped)
##
## FALSE TRUE
## 59829 4115
The main purpose of this code is to clean the missing and erroneous
data in the match_data dataset. First, the rows with
suspended and stopped values as
False are filtered. Then, the values in the critical
columns are converted to appropriate numeric and date formats, and the
erroneous data are marked as NA. Finally, the missing
(NA) values in the critical columns are checked and these
rows are removed from the dataset. After cleaning, the number of rows in
the dataset is reported to the user.
cat("Amount of rows before data cleaning:", nrow(match_data), "\n")
## Amount of rows before data cleaning: 63944
match_data <- subset(match_data, suspended == FALSE & stopped == FALSE)
cat("Amount of rows after data cleaning 1:", nrow(match_data), "\n")
## Amount of rows after data cleaning 1: 56127
match_data$`1` <- suppressWarnings(as.numeric(match_data$`1`))
match_data$X <- suppressWarnings(as.numeric(match_data$X))
match_data$`2` <- suppressWarnings(as.numeric(match_data$`2`))
match_data$current_time <- as.POSIXct(match_data$current_time, format = "%Y-%m-%d %H:%M:%S", tz = "UTC")
match_data$half_start_datetime <- as.POSIXct(match_data$half_start_datetime, format = "%Y-%m-%d %H:%M:%S", tz = "UTC")
match_data$latest_bookmaker_update <- as.POSIXct(match_data$latest_bookmaker_update, format = "%Y-%m-%d %H:%M:%S", tz = "UTC")
critical_columns <- c("current_time", "half_start_datetime", "1", "X", "2")
match_data <- match_data[complete.cases(match_data[, critical_columns]), ]
cat("Amount of rows after data cleaning 2:", nrow(match_data), "\n")
## Amount of rows after data cleaning 2: 56127
print(head(match_data))
## # A tibble: 6 × 106
## fixture_id halftime current_time half_start_datetime
## <dbl> <chr> <dttm> <dttm>
## 1 19134453 1st-half 2024-08-16 19:01:19 2024-08-16 19:00:31
## 2 19134453 1st-half 2024-08-16 19:02:18 2024-08-16 19:00:31
## 3 19134453 1st-half 2024-08-16 19:03:19 2024-08-16 19:00:31
## 4 19134453 1st-half 2024-08-16 19:04:18 2024-08-16 19:00:31
## 5 19134453 1st-half 2024-08-16 19:05:19 2024-08-16 19:00:31
## 6 19134453 1st-half 2024-08-16 19:06:18 2024-08-16 19:00:31
## # ℹ 102 more variables: match_start_datetime <dttm>, minute <dbl>,
## # second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## # stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## # `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## # `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## # `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>,
## # `Ball Possession % - home` <dbl>, `Ball Safe - away` <dbl>, …
TASK 1 TASK 1.1 and TASK 1.2
match_data <- match_data %>%
mutate(
Total_odds = (1 / `1`) + (1 / `2`) + (1 / `X`),
P_home = (1 / `1`) / Total_odds,
P_away = (1 / `2`) / Total_odds,
P_draw = (1 / `X`) / Total_odds,
P_home_minus_P_away = P_home - P_away
)
match_data <- match_data %>%
mutate(
total_prob = P_home + P_draw + P_away,
P_home_norm = (P_home / total_prob),
P_draw_norm = (P_draw / total_prob),
P_away_norm = (P_away / total_prob)
)
first_half <- match_data %>% filter(halftime == "1st-half")
second_half <- match_data %>% filter(halftime == "2nd-half")
head(first_half)
## # A tibble: 6 × 115
## fixture_id halftime current_time half_start_datetime
## <dbl> <chr> <dttm> <dttm>
## 1 19134453 1st-half 2024-08-16 19:01:19 2024-08-16 19:00:31
## 2 19134453 1st-half 2024-08-16 19:02:18 2024-08-16 19:00:31
## 3 19134453 1st-half 2024-08-16 19:03:19 2024-08-16 19:00:31
## 4 19134453 1st-half 2024-08-16 19:04:18 2024-08-16 19:00:31
## 5 19134453 1st-half 2024-08-16 19:05:19 2024-08-16 19:00:31
## 6 19134453 1st-half 2024-08-16 19:06:18 2024-08-16 19:00:31
## # ℹ 111 more variables: match_start_datetime <dttm>, minute <dbl>,
## # second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## # stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## # `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## # `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## # `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>,
## # `Ball Possession % - home` <dbl>, `Ball Safe - away` <dbl>, …
head(second_half)
## # A tibble: 6 × 115
## fixture_id halftime current_time half_start_datetime
## <dbl> <chr> <dttm> <dttm>
## 1 19134453 2nd-half 2024-08-16 20:04:18 2024-08-16 20:02:38
## 2 19134453 2nd-half 2024-08-16 20:05:18 2024-08-16 20:02:38
## 3 19134453 2nd-half 2024-08-16 20:06:18 2024-08-16 20:02:38
## 4 19134453 2nd-half 2024-08-16 20:07:19 2024-08-16 20:02:38
## 5 19134453 2nd-half 2024-08-16 20:08:18 2024-08-16 20:02:38
## 6 19134453 2nd-half 2024-08-16 20:09:18 2024-08-16 20:02:38
## # ℹ 111 more variables: match_start_datetime <dttm>, minute <dbl>,
## # second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## # stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## # `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## # `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## # `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>,
## # `Ball Possession % - home` <dbl>, `Ball Safe - away` <dbl>, …
check_normalization_count <- function(data) {
normalized_total <- data$P_home_norm + data$P_draw_norm + data$P_away_norm
count_greater_than_one <- sum(normalized_total > 1.001)
return(count_greater_than_one)
}
first_half_issues_count <- check_normalization_count(first_half)
second_half_issues_count <- check_normalization_count(second_half)
TASK 1.3 - First Half
first_half <- first_half %>%
filter(!is.na(`1`) & !is.na(`2`))
ggplot(first_half, aes(x = P_home_minus_P_away, y = P_draw)) +
geom_point(alpha = 0.5, color = "blue") +
labs(title = "First Half - P(Home Win) - P(Away Win) vs P(Draw)",
x = "P(Home Win) - P(Away Win)",
y = "P(Draw)") +
theme_minimal()
bins <- seq(-1, 1, by = 0.2)
binned_data <- first_half %>%
mutate(bin = cut(P_home_minus_P_away, breaks = bins, include.lowest = TRUE)) %>%
group_by(bin) %>%
summarise(
total_games = n(),
draws = sum(result == "X"),
empirical_P_tie = draws / total_games,
avg_bookmaker_P_tie = mean(P_draw, na.rm = TRUE)
) %>%
filter(!is.na(bin))
ggplot(binned_data, aes(x = bin)) +
geom_bar(aes(y = empirical_P_tie), stat = "identity", fill = "red", alpha = 0.6) +
geom_point(aes(y = avg_bookmaker_P_tie), color = "blue", size = 3) +
geom_line(aes(y = avg_bookmaker_P_tie, group = 1), color = "blue", linetype = "dashed") +
labs(title = "First Half - Empirical vs Bookmaker P(Draw) by Bins",
x = "P(Home Win) - P(Away Win) Bins",
y = "Probability of Draw") +
theme_minimal()
num_bins <- 20
coeffs_1st_half <- lm(P_draw ~ poly(P_home_minus_P_away, 2), data = first_half)
first_half$predicted_draw <- predict(coeffs_1st_half, newdata = first_half)
breaks <- seq(min(first_half$P_home_minus_P_away), max(first_half$P_home_minus_P_away), length.out = num_bins + 1)
first_half$P_home_minus_P_away_bin <- cut(first_half$P_home_minus_P_away, breaks = breaks, include.lowest = TRUE)
actual_probabilities_first <- first_half %>%
group_by(P_home_minus_P_away_bin) %>%
summarise(probability_of_draw = mean(result == "X", na.rm = TRUE))
bin_centers <- breaks[-length(breaks)] + diff(breaks) / 2
actual_probabilities_first <- actual_probabilities_first %>%
mutate(bin_center = bin_centers)
coeffs_1st_half_actual <- lm(probability_of_draw ~ poly(bin_centers, 2), data = actual_probabilities_first)
predicted_probabilities <- predict(coeffs_1st_half_actual, newdata = actual_probabilities_first)
ggplot(first_half, aes(x = P_home_minus_P_away, y = P_draw)) +
geom_point(alpha = 0.5, color = "blue") + # Bookmaker olasılıkları
geom_line(aes(x = P_home_minus_P_away, y = predicted_draw), color = "red", size = 1) + # Bookmaker Trend Line
geom_point(data = actual_probabilities_first, aes(x = bin_centers, y = probability_of_draw), color = "green", size = 3) +
geom_line(data = actual_probabilities_first, aes(x = bin_centers, y = predicted_probabilities), color = "orange", size = 1) +
labs(
title = "P(Home Win) - P(Away Win) vs P(Draw) (1st Half) with Actual Outcome Trend",
x = "P(Home Win) - P(Away Win)",
y = "P(Draw)"
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
TASK 1.3 - Second Half
second_half <- second_half %>%
filter(!is.na(`1`) & !is.na(`2`))
ggplot(second_half, aes(x = P_home_minus_P_away, y = P_draw)) +
geom_point(alpha = 0.5, color = "blue") +
labs(title = "Second Half - P(Home Win) - P(Away Win) vs P(Draw)",
x = "P(Home Win) - P(Away Win)",
y = "P(Draw)") +
theme_minimal()
bins <- seq(-1, 1, by = 0.2)
binned_data <- second_half %>%
mutate(bin = cut(P_home_minus_P_away, breaks = bins, include.lowest = TRUE)) %>%
group_by(bin) %>%
summarise(
total_games = n(),
draws = sum(result == "X"),
empirical_P_tie = draws / total_games,
avg_bookmaker_P_tie = mean(P_draw, na.rm = TRUE)
) %>%
filter(!is.na(bin))
ggplot(binned_data, aes(x = bin)) +
geom_bar(aes(y = empirical_P_tie), stat = "identity", fill = "red", alpha = 0.6) +
geom_point(aes(y = avg_bookmaker_P_tie), color = "blue", size = 3) +
geom_line(aes(y = avg_bookmaker_P_tie, group = 1), color = "blue", linetype = "dashed") +
labs(title = "Second Half - Empirical vs Bookmaker P(Draw) by Bins",
x = "P(Home Win) - P(Away Win) Bins",
y = "Probability of Draw") +
theme_minimal()
The blue dots on the graph are below or above the red bars, indicating
that the predicted Draw probabilities (the values provided by the
bookmaker) are lower or higher than the observed Draw probabilities
(empirical P(tie)). If the blue dots are above the red bars, the Draw
probability given by the bookmaker tends to be lower than the actual
data. In this case, betting on Draw can be profitable in the long run,
because the bookmaker gives a lower probability, while the actual
probability is higher. However, there are also cases where the blue dots
are below the red bars. This indicates that the Draw probability given
by the bookmaker is higher than the actual probability, and in such a
case, betting on Draw may not be profitable in the long run. Such
analyses provide an important tool for identifying opportunities in
betting strategies. The lower number of blue dots at the top in the
second half suggests that the Draw odds were more consistent and
predictable. This could mean that the bookmaker’s predictions were more
accurate in the second half and that the outcome of the match was less
uncertain than in the first half. This consistency could have caused the
blue dots to be less likely to be at the top of the red columns.
num_bins <- 20
coeffs_2nd_half <- lm(P_draw ~ poly(P_home_minus_P_away, 2), data = second_half)
second_half$predicted_draw <- predict(coeffs_2nd_half, newdata = second_half)
breaks <- seq(min(second_half$P_home_minus_P_away), max(second_half$P_home_minus_P_away), length.out = num_bins + 1)
second_half$P_home_minus_P_away_bin <- cut(second_half$P_home_minus_P_away, breaks = breaks, include.lowest = TRUE)
actual_probabilities_second <- second_half %>%
group_by(P_home_minus_P_away_bin) %>%
summarise(probability_of_draw = mean(result == "X", na.rm = TRUE))
bin_centers <- breaks[-length(breaks)] + diff(breaks) / 2
actual_probabilities_second <- actual_probabilities_second %>%
mutate(bin_center = bin_centers)
coeffs_2nd_half_actual <- lm(probability_of_draw ~ poly(bin_centers, 2), data = actual_probabilities_second)
predicted_probabilities <- predict(coeffs_2nd_half_actual, newdata = actual_probabilities_second)
ggplot(second_half, aes(x = P_home_minus_P_away, y = P_draw)) +
geom_point(alpha = 0.5, color = "blue") + # Bookmaker olasılıkları
geom_line(aes(x = P_home_minus_P_away, y = predicted_draw), color = "red", size = 1) + # Bookmaker Trend Line
geom_point(data = actual_probabilities_second, aes(x = bin_centers, y = probability_of_draw), color = "green", size = 3) +
geom_line(data = actual_probabilities_second, aes(x = bin_centers, y = predicted_probabilities), color = "orange", size = 1) +
labs(
title = "P(Home Win) - P(Away Win) vs P(Draw) (2nd Half) with Actual Outcome Trend",
x = "P(Home Win) - P(Away Win)",
y = "P(Draw)"
) +
theme_minimal()
TASK 2 The code filters out matches that scored after the 90th minute and received a red card before the 10th minute. First, it looks at the differences in the number of goals before and after the 90th minute and identifies matches that show these differences. It then also excludes matches that resulted in early red cards and removes these matches from the match_data_special data frame. As a result, matches that meet certain criteria are removed from the data set and the number of excluded matches is calculated.
#After 90th minute goals elimination
events_after_90 <- match_data %>%
filter(minute > 90)
goals_before_90 <- match_data %>%
filter(minute <= 90) %>%
select(fixture_id, `Goals - home`, `Goals - away`) %>%
distinct()
goals_after_90 <- events_after_90 %>%
select(fixture_id, `Goals - home`, `Goals - away`) %>%
distinct()
matches_with_diff_goals <- goals_after_90 %>%
inner_join(goals_before_90, by = "fixture_id", suffix = c("_after_90", "_before_90")) %>%
filter(`Goals - away_after_90` != `Goals - away_before_90` |
`Goals - home_after_90` != `Goals - home_before_90`) %>%
pull(fixture_id) %>%
unique()
#Early red card matches elimination
early_red_card_matches <- match_data %>%
filter(minute < 10, (`Redcards - home` > 0 | `Redcards - away` > 0)) %>%
pull(fixture_id) %>%
unique()
exclude_matches <- union(matches_with_diff_goals, early_red_card_matches)
match_data_special <- match_data %>%
filter(!fixture_id %in% exclude_matches)
removed_matches <- nrow(match_data) - nrow(match_data_special)
cat("Amount of deleted rows: ", removed_matches, "\n")
## Amount of deleted rows: 3280
match_data_special
## # A tibble: 52,847 × 115
## fixture_id halftime current_time half_start_datetime
## <dbl> <chr> <dttm> <dttm>
## 1 19134453 1st-half 2024-08-16 19:01:19 2024-08-16 19:00:31
## 2 19134453 1st-half 2024-08-16 19:02:18 2024-08-16 19:00:31
## 3 19134453 1st-half 2024-08-16 19:03:19 2024-08-16 19:00:31
## 4 19134453 1st-half 2024-08-16 19:04:18 2024-08-16 19:00:31
## 5 19134453 1st-half 2024-08-16 19:05:19 2024-08-16 19:00:31
## 6 19134453 1st-half 2024-08-16 19:06:18 2024-08-16 19:00:31
## 7 19134453 1st-half 2024-08-16 19:07:18 2024-08-16 19:00:31
## 8 19134453 1st-half 2024-08-16 19:08:19 2024-08-16 19:00:31
## 9 19134453 1st-half 2024-08-16 19:09:19 2024-08-16 19:00:31
## 10 19134453 1st-half 2024-08-16 19:10:18 2024-08-16 19:00:31
## # ℹ 52,837 more rows
## # ℹ 111 more variables: match_start_datetime <dttm>, minute <dbl>,
## # second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## # stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## # `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## # `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## # `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>, …
first_half_special <- match_data_special %>% filter(halftime == "1st-half")
second_half_special <- match_data_special %>% filter(halftime == "2nd-half")
head(first_half_special)
## # A tibble: 6 × 115
## fixture_id halftime current_time half_start_datetime
## <dbl> <chr> <dttm> <dttm>
## 1 19134453 1st-half 2024-08-16 19:01:19 2024-08-16 19:00:31
## 2 19134453 1st-half 2024-08-16 19:02:18 2024-08-16 19:00:31
## 3 19134453 1st-half 2024-08-16 19:03:19 2024-08-16 19:00:31
## 4 19134453 1st-half 2024-08-16 19:04:18 2024-08-16 19:00:31
## 5 19134453 1st-half 2024-08-16 19:05:19 2024-08-16 19:00:31
## 6 19134453 1st-half 2024-08-16 19:06:18 2024-08-16 19:00:31
## # ℹ 111 more variables: match_start_datetime <dttm>, minute <dbl>,
## # second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## # stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## # `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## # `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## # `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>,
## # `Ball Possession % - home` <dbl>, `Ball Safe - away` <dbl>, …
head(second_half_special)
## # A tibble: 6 × 115
## fixture_id halftime current_time half_start_datetime
## <dbl> <chr> <dttm> <dttm>
## 1 19134453 2nd-half 2024-08-16 20:04:18 2024-08-16 20:02:38
## 2 19134453 2nd-half 2024-08-16 20:05:18 2024-08-16 20:02:38
## 3 19134453 2nd-half 2024-08-16 20:06:18 2024-08-16 20:02:38
## 4 19134453 2nd-half 2024-08-16 20:07:19 2024-08-16 20:02:38
## 5 19134453 2nd-half 2024-08-16 20:08:18 2024-08-16 20:02:38
## 6 19134453 2nd-half 2024-08-16 20:09:18 2024-08-16 20:02:38
## # ℹ 111 more variables: match_start_datetime <dttm>, minute <dbl>,
## # second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## # stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## # `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## # `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## # `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>,
## # `Ball Possession % - home` <dbl>, `Ball Safe - away` <dbl>, …
After specialization, 1st half
ggplot(first_half_special, aes(x = P_home_minus_P_away, y = P_draw)) +
geom_point(alpha = 0.5, color = "blue") +
labs(title = "First Half - P(Home Win) - P(Away Win) vs P(Draw)",
x = "P(Home Win) - P(Away Win)",
y = "P(Draw)") +
theme_minimal()
bins <- seq(-1, 1, by = 0.2)
binned_data <- first_half_special %>%
mutate(bin = cut(P_home_minus_P_away, breaks = bins, include.lowest = TRUE)) %>%
group_by(bin) %>%
summarise(
total_games = n(),
draws = sum(result == "X"),
empirical_P_tie = draws / total_games,
avg_bookmaker_P_tie = mean(P_draw, na.rm = TRUE)
) %>%
filter(!is.na(bin))
ggplot(binned_data, aes(x = bin)) +
geom_bar(aes(y = empirical_P_tie), stat = "identity", fill = "red", alpha = 0.6) +
geom_point(aes(y = avg_bookmaker_P_tie), color = "blue", size = 3) +
geom_line(aes(y = avg_bookmaker_P_tie, group = 1), color = "blue", linetype = "dashed") +
labs(title = "First Half - Empirical vs Bookmaker P(Draw) by Bins",
x = "P(Home Win) - P(Away Win) Bins",
y = "Probability of Draw") +
theme_minimal()
num_bins <- 20
coeffs_1st_half <- lm(P_draw ~ poly(P_home_minus_P_away, 2), data = first_half_special)
first_half_special$predicted_draw <- predict(coeffs_1st_half, newdata = first_half_special)
breaks <- seq(min(first_half_special$P_home_minus_P_away), max(first_half_special$P_home_minus_P_away), length.out = num_bins + 1)
first_half_special$P_home_minus_P_away_bin <- cut(first_half_special$P_home_minus_P_away, breaks = breaks, include.lowest = TRUE)
actual_probabilities_first <- first_half_special %>%
group_by(P_home_minus_P_away_bin) %>%
summarise(probability_of_draw = mean(result == "X", na.rm = TRUE))
bin_centers <- breaks[-length(breaks)] + diff(breaks) / 2
actual_probabilities_first <- actual_probabilities_first %>%
mutate(bin_center = bin_centers)
coeffs_1st_half_actual <- lm(probability_of_draw ~ poly(bin_centers, 2), data = actual_probabilities_first)
predicted_probabilities <- predict(coeffs_1st_half_actual, newdata = actual_probabilities_first)
ggplot(first_half_special, aes(x = P_home_minus_P_away, y = P_draw)) +
geom_point(alpha = 0.5, color = "blue") + # Bookmaker olasılıkları
geom_line(aes(x = P_home_minus_P_away, y = predicted_draw), color = "red", size = 1) + # Bookmaker Trend Line
geom_point(data = actual_probabilities_first, aes(x = bin_centers, y = probability_of_draw), color = "green", size = 3) +
geom_line(data = actual_probabilities_first, aes(x = bin_centers, y = predicted_probabilities), color = "orange", size = 1) +
labs(
title = "P(Home Win) - P(Away Win) vs P(Draw) (1st Half) with Actual Outcome Trend",
x = "P(Home Win) - P(Away Win)",
y = "P(Draw)"
) +
theme_minimal()
After removing the matches fitting well to the cases (red card in the
first 10 minutes of a game and one of the teams score a goal after 90th
minute), we see that there is a better fit in the model.
After specialization, 2nd half
ggplot(second_half_special, aes(x = P_home_minus_P_away, y = P_draw)) +
geom_point(alpha = 0.5, color = "blue") +
labs(title = "Second Half - P(Home Win) - P(Away Win) vs P(Draw)",
x = "P(Home Win) - P(Away Win)",
y = "P(Draw)") +
theme_minimal()
bins <- seq(-1, 1, by = 0.2)
binned_data <- second_half_special %>%
mutate(bin = cut(P_home_minus_P_away, breaks = bins, include.lowest = TRUE)) %>%
group_by(bin) %>%
summarise(
total_games = n(),
draws = sum(result == "X"),
empirical_P_tie = draws / total_games,
avg_bookmaker_P_tie = mean(P_draw, na.rm = TRUE)
) %>%
filter(!is.na(bin))
ggplot(binned_data, aes(x = bin)) +
geom_bar(aes(y = empirical_P_tie), stat = "identity", fill = "red", alpha = 0.6) +
geom_point(aes(y = avg_bookmaker_P_tie), color = "blue", size = 3) +
geom_line(aes(y = avg_bookmaker_P_tie, group = 1), color = "blue", linetype = "dashed") +
labs(title = "Second Half - Empirical vs Bookmaker P(Draw) by Bins",
x = "P(Home Win) - P(Away Win) Bins",
y = "Probability of Draw") +
theme_minimal()
num_bins <- 20
coeffs_2nd_half <- lm(P_draw ~ poly(P_home_minus_P_away, 2), data = second_half_special)
second_half_special$predicted_draw <- predict(coeffs_2nd_half, newdata = second_half_special)
breaks <- seq(min(second_half_special$P_home_minus_P_away), max(second_half_special$P_home_minus_P_away), length.out = num_bins + 1)
second_half_special$P_home_minus_P_away_bin <- cut(second_half_special$P_home_minus_P_away, breaks = breaks, include.lowest = TRUE)
actual_probabilities_second <- second_half_special %>%
group_by(P_home_minus_P_away_bin) %>%
summarise(probability_of_draw = mean(result == "X", na.rm = TRUE))
bin_centers <- breaks[-length(breaks)] + diff(breaks) / 2
actual_probabilities_second <- actual_probabilities_second %>%
mutate(bin_center = bin_centers)
coeffs_2nd_half_actual <- lm(probability_of_draw ~ poly(bin_centers, 2), data = actual_probabilities_second)
predicted_probabilities <- predict(coeffs_2nd_half_actual, newdata = actual_probabilities_second)
ggplot(second_half_special, aes(x = P_home_minus_P_away, y = P_draw)) +
geom_point(alpha = 0.5, color = "blue") + # Bookmaker olasılıkları
geom_line(aes(x = P_home_minus_P_away, y = predicted_draw), color = "red", size = 1) + # Bookmaker Trend Line
geom_point(data = actual_probabilities_second, aes(x = bin_centers, y = probability_of_draw), color = "green", size = 3) +
geom_line(data = actual_probabilities_second, aes(x = bin_centers, y = predicted_probabilities), color = "orange", size = 1) +
labs(
title = "(2nd Half) | P(Home Win) - P(Away Win) vs P(Draw) with Actual Outcome Trend",
x = "P(Home Win) - P(Away Win)",
y = "P(Draw)"
) +
theme_minimal()
After removing the matches fitting well to the cases (red card in the
first 10 minutes of a game and one of the teams score a goal after 90th
minute), we see that there is a better fit in the model.
TASK 3 Before starting the analysis, I looked at the correlations between the variables and those with a correlation of over 85% to get a general overview.We see a 1 or -1 correlation between some variables (such as Ball Possession % - home and Ball Possession % - away). Since such variables are directly related to each other, I decided not to give importance to this high difficulty in the analysis.
# Grup bazında forward fill uygulama (grup: fixture_id)
library(dplyr)
match_data_special <- match_data_special %>%
group_by(fixture_id) %>%
arrange(fixture_id) %>%
tidyr::fill(everything(), .direction = "down") %>%
ungroup()
# NA'ları kontrol etme
cat("NA sayısı:\n")
## NA sayısı:
print(colSums(is.na(match_data_special)))
## fixture_id halftime
## 0 0
## current_time half_start_datetime
## 0 0
## match_start_datetime minute
## 0 0
## second latest_bookmaker_update
## 0 0
## suspended stopped
## 0 0
## 1 2
## 0 0
## X name
## 0 0
## ticking Accurate Crosses - away
## 0 2735
## Accurate Crosses - home Assists - away
## 2716 24695
## Assists - home Attacks - away
## 24693 37
## Attacks - home Ball Possession % - away
## 32 22
## Ball Possession % - home Ball Safe - away
## 21 20071
## Ball Safe - home Challenges - away
## 20067 4197
## Challenges - home Corners - away
## 4192 69
## Corners - home Counter Attacks - away
## 62 34253
## Counter Attacks - home Dangerous Attacks - away
## 34251 50
## Dangerous Attacks - home Dribble Attempts - away
## 45 7031
## Dribble Attempts - home Fouls - away
## 7007 1459
## Fouls - home Free Kicks - away
## 1448 48159
## Free Kicks - home Goal Attempts - away
## 48159 23691
## Goal Attempts - home Goal Kicks - away
## 23684 4138
## Goal Kicks - home Goals - away
## 4131 37
## Goals - home Headers - away
## 36 9281
## Headers - home Hit Woodwork - away
## 9275 185
## Hit Woodwork - home Injuries - away
## 165 39045
## Injuries - home Interceptions - away
## 39044 3811
## Interceptions - home Key Passes - away
## 3791 4883
## Key Passes - home Long Passes - away
## 4880 4867
## Long Passes - home Offsides - away
## 4857 15828
## Offsides - home Passes - away
## 15823 275
## Passes - home Penalties - away
## 261 73
## Penalties - home Redcards - away
## 66 59
## Redcards - home Saves - away
## 51 9445
## Saves - home Score Change - away
## 9435 0
## Score Change - home Shots Blocked - away
## 0 212
## Shots Blocked - home Shots Insidebox - away
## 201 243
## Shots Insidebox - home Shots Off Target - away
## 225 33
## Shots Off Target - home Shots On Target - away
## 28 32
## Shots On Target - home Shots Outsidebox - away
## 28 247
## Shots Outsidebox - home Shots Total - away
## 226 0
## Shots Total - home Substitutions - away
## 0 78
## Substitutions - home Successful Dribbles - away
## 69 4790
## Successful Dribbles - home Successful Headers - away
## 4775 9297
## Successful Headers - home Successful Interceptions - away
## 9287 2399
## Successful Interceptions - home Successful Passes - away
## 2394 362
## Successful Passes - home Successful Passes Percentage - away
## 365 143
## Successful Passes Percentage - home Tackles - away
## 140 1761
## Tackles - home Throwins - away
## 1750 1017
## Throwins - home Total Crosses - away
## 1010 2009
## Total Crosses - home Yellowcards - away
## 1998 74
## Yellowcards - home Yellowred Cards - away
## 64 19926
## Yellowred Cards - home current_state
## 19919 37
## final_score result
## 0 0
## Total_odds P_home
## 0 0
## P_away P_draw
## 0 0
## P_home_minus_P_away total_prob
## 0 0
## P_home_norm P_draw_norm
## 0 0
## P_away_norm
## 0
# NA'ları 0 ile doldururken, veri türlerine göre dikkatli bir şekilde doldur
match_data_special <- match_data_special %>%
mutate(across(where(is.numeric), ~replace(., is.na(.), 0)), # Sayısal sütunlar için 0 ile doldurma
across(where(is.character), ~replace(., is.na(.), "0"))) # Karakter sütunlar için "0" ile doldurma
# NA'ları tekrar kontrol etme
cat("NA sayısı (0 ile doldurulduktan sonra):\n")
## NA sayısı (0 ile doldurulduktan sonra):
print(colSums(is.na(match_data_special)))
## fixture_id halftime
## 0 0
## current_time half_start_datetime
## 0 0
## match_start_datetime minute
## 0 0
## second latest_bookmaker_update
## 0 0
## suspended stopped
## 0 0
## 1 2
## 0 0
## X name
## 0 0
## ticking Accurate Crosses - away
## 0 0
## Accurate Crosses - home Assists - away
## 0 0
## Assists - home Attacks - away
## 0 0
## Attacks - home Ball Possession % - away
## 0 0
## Ball Possession % - home Ball Safe - away
## 0 0
## Ball Safe - home Challenges - away
## 0 0
## Challenges - home Corners - away
## 0 0
## Corners - home Counter Attacks - away
## 0 0
## Counter Attacks - home Dangerous Attacks - away
## 0 0
## Dangerous Attacks - home Dribble Attempts - away
## 0 0
## Dribble Attempts - home Fouls - away
## 0 0
## Fouls - home Free Kicks - away
## 0 0
## Free Kicks - home Goal Attempts - away
## 0 0
## Goal Attempts - home Goal Kicks - away
## 0 0
## Goal Kicks - home Goals - away
## 0 0
## Goals - home Headers - away
## 0 0
## Headers - home Hit Woodwork - away
## 0 0
## Hit Woodwork - home Injuries - away
## 0 0
## Injuries - home Interceptions - away
## 0 0
## Interceptions - home Key Passes - away
## 0 0
## Key Passes - home Long Passes - away
## 0 0
## Long Passes - home Offsides - away
## 0 0
## Offsides - home Passes - away
## 0 0
## Passes - home Penalties - away
## 0 0
## Penalties - home Redcards - away
## 0 0
## Redcards - home Saves - away
## 0 0
## Saves - home Score Change - away
## 0 0
## Score Change - home Shots Blocked - away
## 0 0
## Shots Blocked - home Shots Insidebox - away
## 0 0
## Shots Insidebox - home Shots Off Target - away
## 0 0
## Shots Off Target - home Shots On Target - away
## 0 0
## Shots On Target - home Shots Outsidebox - away
## 0 0
## Shots Outsidebox - home Shots Total - away
## 0 0
## Shots Total - home Substitutions - away
## 0 0
## Substitutions - home Successful Dribbles - away
## 0 0
## Successful Dribbles - home Successful Headers - away
## 0 0
## Successful Headers - home Successful Interceptions - away
## 0 0
## Successful Interceptions - home Successful Passes - away
## 0 0
## Successful Passes - home Successful Passes Percentage - away
## 0 0
## Successful Passes Percentage - home Tackles - away
## 0 0
## Tackles - home Throwins - away
## 0 0
## Throwins - home Total Crosses - away
## 0 0
## Total Crosses - home Yellowcards - away
## 0 0
## Yellowcards - home Yellowred Cards - away
## 0 0
## Yellowred Cards - home current_state
## 0 0
## final_score result
## 0 0
## Total_odds P_home
## 0 0
## P_away P_draw
## 0 0
## P_home_minus_P_away total_prob
## 0 0
## P_home_norm P_draw_norm
## 0 0
## P_away_norm
## 0
numeric_data <- first_half_special %>% select_if(is.numeric)
correlation_matrix <- cor(numeric_data, use = "complete.obs")
## Warning in cor(numeric_data, use = "complete.obs"): the standard deviation is
## zero
correlation_long <- as.data.frame(as.table(correlation_matrix))
correlation_long <- correlation_long[correlation_long$Var1 != correlation_long$Var2, ]
correlation_long <- correlation_long[!is.na(correlation_long$Freq), ]
correlation_long <- correlation_long[abs(correlation_long$Freq) > 0.85, ]
correlation_long
## Var1 Var2
## 5 2 fixture_id
## 33 Goal Kicks - away fixture_id
## 42 Injuries - home fixture_id
## 54 Penalties - home fixture_id
## 96 P_home fixture_id
## 101 P_home_norm fixture_id
## 225 Challenges - away second
## 251 Interceptions - away second
## 409 P_away 1
## 415 P_away_norm 1
## 417 fixture_id 2
## 422 X 2
## 458 Injuries - home 2
## 470 Penalties - home 2
## 525 2 X
## 562 Injuries - home X
## 574 Penalties - home X
## 785 Saves - away Accurate Crosses - home
## 792 Shots Insidebox - home Accurate Crosses - home
## 796 Shots On Target - home Accurate Crosses - home
## 819 Yellowcards - away Accurate Crosses - home
## 1053 Ball Possession % - away Attacks - away
## 1054 Ball Possession % - home Attacks - away
## 1062 Counter Attacks - home Attacks - away
## 1063 Dangerous Attacks - away Attacks - away
## 1071 Goal Attempts - away Attacks - away
## 1074 Goal Kicks - home Attacks - away
## 1085 Key Passes - away Attacks - away
## 1091 Passes - away Attacks - away
## 1103 Shots Insidebox - away Attacks - away
## 1105 Shots Off Target - away Attacks - away
## 1111 Shots Total - away Attacks - away
## 1113 Substitutions - away Attacks - away
## 1121 Successful Passes - away Attacks - away
## 1123 Successful Passes Percentage - away Attacks - away
## 1124 Successful Passes Percentage - home Attacks - away
## 1129 Total Crosses - away Attacks - away
## 1157 Ball Possession % - away Attacks - home
## 1158 Ball Possession % - home Attacks - home
## 1167 Dangerous Attacks - away Attacks - home
## 1168 Dangerous Attacks - home Attacks - home
## 1178 Goal Kicks - home Attacks - home
## 1196 Passes - home Attacks - home
## 1215 Shots Total - away Attacks - home
## 1226 Successful Passes - home Attacks - home
## 1228 Successful Passes Percentage - home Attacks - home
## 1234 Total Crosses - home Attacks - home
## 1259 Attacks - away Ball Possession % - away
## 1260 Attacks - home Ball Possession % - away
## 1262 Ball Possession % - home Ball Possession % - away
## 1267 Corners - away Ball Possession % - away
## 1268 Corners - home Ball Possession % - away
## 1270 Counter Attacks - home Ball Possession % - away
## 1271 Dangerous Attacks - away Ball Possession % - away
## 1282 Goal Kicks - home Ball Possession % - away
## 1293 Key Passes - away Ball Possession % - away
## 1299 Passes - away Ball Possession % - away
## 1311 Shots Insidebox - away Ball Possession % - away
## 1313 Shots Off Target - away Ball Possession % - away
## 1319 Shots Total - away Ball Possession % - away
## 1321 Substitutions - away Ball Possession % - away
## 1329 Successful Passes - away Ball Possession % - away
## 1330 Successful Passes - home Ball Possession % - away
## 1331 Successful Passes Percentage - away Ball Possession % - away
## 1332 Successful Passes Percentage - home Ball Possession % - away
## 1337 Total Crosses - away Ball Possession % - away
## 1338 Total Crosses - home Ball Possession % - away
## 1363 Attacks - away Ball Possession % - home
## 1364 Attacks - home Ball Possession % - home
## 1365 Ball Possession % - away Ball Possession % - home
## 1371 Corners - away Ball Possession % - home
## 1372 Corners - home Ball Possession % - home
## 1374 Counter Attacks - home Ball Possession % - home
## 1375 Dangerous Attacks - away Ball Possession % - home
## 1386 Goal Kicks - home Ball Possession % - home
## 1397 Key Passes - away Ball Possession % - home
## 1403 Passes - away Ball Possession % - home
## 1415 Shots Insidebox - away Ball Possession % - home
## 1417 Shots Off Target - away Ball Possession % - home
## 1423 Shots Total - away Ball Possession % - home
## 1425 Substitutions - away Ball Possession % - home
## 1433 Successful Passes - away Ball Possession % - home
## 1434 Successful Passes - home Ball Possession % - home
## 1435 Successful Passes Percentage - away Ball Possession % - home
## 1436 Successful Passes Percentage - home Ball Possession % - home
## 1441 Total Crosses - away Ball Possession % - home
## 1442 Total Crosses - home Ball Possession % - home
## 1474 Challenges - home Ball Safe - away
## 1500 Interceptions - home Ball Safe - away
## 1601 Injuries - away Ball Safe - home
## 1612 Passes - home Ball Safe - home
## 1636 Successful Dribbles - home Ball Safe - home
## 1642 Successful Passes - home Ball Safe - home
## 1667 second Challenges - away
## 1707 Interceptions - away Challenges - away
## 1752 Throwins - home Challenges - away
## 1783 Ball Safe - away Challenges - home
## 1811 Interceptions - away Challenges - home
## 1812 Interceptions - home Challenges - home
## 1885 Ball Possession % - away Corners - away
## 1886 Ball Possession % - home Corners - away
## 1892 Corners - home Corners - away
## 1907 Goals - away Corners - away
## 1922 Offsides - home Corners - away
## 1923 Passes - away Corners - away
## 1930 Saves - home Corners - away
## 1953 Successful Passes - away Corners - away
## 1956 Successful Passes Percentage - home Corners - away
## 1961 Total Crosses - away Corners - away
## 1962 Total Crosses - home Corners - away
## 1989 Ball Possession % - away Corners - home
## 1990 Ball Possession % - home Corners - home
## 1995 Corners - away Corners - home
## 2011 Goals - away Corners - home
## 2026 Offsides - home Corners - home
## 2027 Passes - away Corners - home
## 2034 Saves - home Corners - home
## 2040 Shots Insidebox - home Corners - home
## 2049 Substitutions - away Corners - home
## 2057 Successful Passes - away Corners - home
## 2059 Successful Passes Percentage - away Corners - home
## 2060 Successful Passes Percentage - home Corners - home
## 2065 Total Crosses - away Corners - home
## 2066 Total Crosses - home Corners - home
## 2195 Attacks - away Counter Attacks - home
## 2197 Ball Possession % - away Counter Attacks - home
## 2198 Ball Possession % - home Counter Attacks - home
## 2207 Dangerous Attacks - away Counter Attacks - home
## 2218 Goal Kicks - home Counter Attacks - home
## 2229 Key Passes - away Counter Attacks - home
## 2235 Passes - away Counter Attacks - home
## 2246 Shots Blocked - home Counter Attacks - home
## 2247 Shots Insidebox - away Counter Attacks - home
## 2249 Shots Off Target - away Counter Attacks - home
## 2255 Shots Total - away Counter Attacks - home
## 2259 Successful Dribbles - away Counter Attacks - home
## 2265 Successful Passes - away Counter Attacks - home
## 2267 Successful Passes Percentage - away Counter Attacks - home
## 2268 Successful Passes Percentage - home Counter Attacks - home
## 2299 Attacks - away Dangerous Attacks - away
## 2300 Attacks - home Dangerous Attacks - away
## 2301 Ball Possession % - away Dangerous Attacks - away
## 2302 Ball Possession % - home Dangerous Attacks - away
## 2310 Counter Attacks - home Dangerous Attacks - away
## 2319 Goal Attempts - away Dangerous Attacks - away
## 2322 Goal Kicks - home Dangerous Attacks - away
## 2333 Key Passes - away Dangerous Attacks - away
## 2339 Passes - away Dangerous Attacks - away
## 2349 Shots Blocked - away Dangerous Attacks - away
## 2351 Shots Insidebox - away Dangerous Attacks - away
## 2353 Shots Off Target - away Dangerous Attacks - away
## 2359 Shots Total - away Dangerous Attacks - away
## 2363 Successful Dribbles - away Dangerous Attacks - away
## 2369 Successful Passes - away Dangerous Attacks - away
## 2371 Successful Passes Percentage - away Dangerous Attacks - away
## 2372 Successful Passes Percentage - home Dangerous Attacks - away
## 2377 Total Crosses - away Dangerous Attacks - away
## 2404 Attacks - home Dangerous Attacks - home
## 2425 Goal Kicks - away Dangerous Attacks - home
## 2444 Passes - home Dangerous Attacks - home
## 2474 Successful Passes - home Dangerous Attacks - home
## 2476 Successful Passes Percentage - home Dangerous Attacks - home
## 2676 Successful Dribbles - home Dribble Attempts - home
## 2750 Key Passes - home Fouls - away
## 2768 Shots Insidebox - home Fouls - away
## 2772 Shots On Target - home Fouls - away
## 2776 Shots Total - home Fouls - away
## 2777 Substitutions - away Fouls - away
## 2784 Successful Interceptions - home Fouls - away
## 2837 Free Kicks - away Fouls - home
## 2861 Penalties - away Fouls - home
## 2940 Fouls - home Free Kicks - away
## 2965 Penalties - away Free Kicks - away
## 3054 Headers - home Free Kicks - home
## 3078 Shots Blocked - home Free Kicks - home
## 3094 Successful Headers - home Free Kicks - home
## 3131 Attacks - away Goal Attempts - away
## 3143 Dangerous Attacks - away Goal Attempts - away
## 3154 Goal Kicks - home Goal Attempts - away
## 3165 Key Passes - away Goal Attempts - away
## 3181 Shots Blocked - away Goal Attempts - away
## 3189 Shots Outsidebox - away Goal Attempts - away
## 3191 Shots Total - away Goal Attempts - away
## 3204 Successful Passes Percentage - home Goal Attempts - away
## 3209 Total Crosses - away Goal Attempts - away
## 3329 fixture_id Goal Kicks - away
## 3352 Dangerous Attacks - home Goal Kicks - away
## 3394 Shots Off Target - home Goal Kicks - away
## 3424 P_home Goal Kicks - away
## 3429 P_home_norm Goal Kicks - away
## 3443 Attacks - away Goal Kicks - home
## 3444 Attacks - home Goal Kicks - home
## 3445 Ball Possession % - away Goal Kicks - home
## 3446 Ball Possession % - home Goal Kicks - home
## 3454 Counter Attacks - home Goal Kicks - home
## 3455 Dangerous Attacks - away Goal Kicks - home
## 3463 Goal Attempts - away Goal Kicks - home
## 3477 Key Passes - away Goal Kicks - home
## 3483 Passes - away Goal Kicks - home
## 3493 Shots Blocked - away Goal Kicks - home
## 3495 Shots Insidebox - away Goal Kicks - home
## 3497 Shots Off Target - away Goal Kicks - home
## 3503 Shots Total - away Goal Kicks - home
## 3507 Successful Dribbles - away Goal Kicks - home
## 3513 Successful Passes - away Goal Kicks - home
## 3515 Successful Passes Percentage - away Goal Kicks - home
## 3516 Successful Passes Percentage - home Goal Kicks - home
## 3521 Total Crosses - away Goal Kicks - home
## 3522 Total Crosses - home Goal Kicks - home
## 3555 Corners - away Goals - away
## 3556 Corners - home Goals - away
## 3594 Saves - home Goals - away
## 3878 Free Kicks - home Headers - home
## 3895 Long Passes - away Headers - home
## 3926 Successful Headers - home Headers - home
## 4176 Ball Safe - home Injuries - away
## 4212 Passes - home Injuries - away
## 4236 Successful Dribbles - home Injuries - away
## 4265 fixture_id Injuries - home
## 4269 2 Injuries - home
## 4270 X Injuries - home
## 4318 Penalties - home Injuries - home
## 4371 second Interceptions - away
## 4385 Challenges - away Interceptions - away
## 4386 Challenges - home Interceptions - away
## 4456 Throwins - home Interceptions - away
## 4487 Ball Safe - away Interceptions - home
## 4490 Challenges - home Interceptions - home
## 4587 Attacks - away Key Passes - away
## 4589 Ball Possession % - away Key Passes - away
## 4590 Ball Possession % - home Key Passes - away
## 4598 Counter Attacks - home Key Passes - away
## 4599 Dangerous Attacks - away Key Passes - away
## 4607 Goal Attempts - away Key Passes - away
## 4610 Goal Kicks - home Key Passes - away
## 4638 Shots Blocked - home Key Passes - away
## 4639 Shots Insidebox - away Key Passes - away
## 4641 Shots Off Target - away Key Passes - away
## 4647 Shots Total - away Key Passes - away
## 4651 Successful Dribbles - away Key Passes - away
## 4657 Successful Passes - away Key Passes - away
## 4659 Successful Passes Percentage - away Key Passes - away
## 4660 Successful Passes Percentage - home Key Passes - away
## 4707 Fouls - away Key Passes - home
## 4737 Saves - away Key Passes - home
## 4744 Shots Insidebox - home Key Passes - home
## 4748 Shots On Target - home Key Passes - home
## 4752 Shots Total - home Key Passes - home
## 4753 Substitutions - away Key Passes - home
## 4760 Successful Interceptions - home Key Passes - home
## 4772 Yellowcards - home Key Passes - home
## 4822 Headers - home Long Passes - away
## 4862 Successful Headers - home Long Passes - away
## 4958 Shots Outsidebox - home Long Passes - home
## 4973 Tackles - away Long Passes - home
## 4984 P_home Long Passes - home
## 4985 P_away Long Passes - home
## 4987 P_home_minus_P_away Long Passes - home
## 4989 P_home_norm Long Passes - home
## 4991 P_away_norm Long Passes - home
## 5115 Corners - away Offsides - home
## 5116 Corners - home Offsides - home
## 5154 Saves - home Offsides - home
## 5185 Total Crosses - away Offsides - home
## 5211 Attacks - away Passes - away
## 5213 Ball Possession % - away Passes - away
## 5214 Ball Possession % - home Passes - away
## 5219 Corners - away Passes - away
## 5220 Corners - home Passes - away
## 5222 Counter Attacks - home Passes - away
## 5223 Dangerous Attacks - away Passes - away
## 5234 Goal Kicks - home Passes - away
## 5263 Shots Insidebox - away Passes - away
## 5265 Shots Off Target - away Passes - away
## 5273 Substitutions - away Passes - away
## 5281 Successful Passes - away Passes - away
## 5283 Successful Passes Percentage - away Passes - away
## 5284 Successful Passes Percentage - home Passes - away
## 5289 Total Crosses - away Passes - away
## 5290 Total Crosses - home Passes - away
## 5316 Attacks - home Passes - home
## 5320 Ball Safe - home Passes - home
## 5328 Dangerous Attacks - home Passes - home
## 5345 Injuries - away Passes - home
## 5365 Shots Blocked - away Passes - home
## 5386 Successful Passes - home Passes - home
## 5388 Successful Passes Percentage - home Passes - home
## 5394 Total Crosses - home Passes - home
## 5436 Fouls - home Penalties - away
## 5437 Free Kicks - away Penalties - away
## 5513 fixture_id Penalties - home
## 5517 2 Penalties - home
## 5518 X Penalties - home
## 5554 Injuries - home Penalties - home
## 5832 Accurate Crosses - home Saves - away
## 5870 Key Passes - home Saves - away
## 5888 Shots Insidebox - home Saves - away
## 5892 Shots On Target - home Saves - away
## 5896 Shots Total - home Saves - away
## 5897 Substitutions - away Saves - away
## 5904 Successful Interceptions - home Saves - away
## 5907 Successful Passes Percentage - away Saves - away
## 5915 Yellowcards - away Saves - away
## 5947 Corners - away Saves - home
## 5948 Corners - home Saves - home
## 5963 Goals - away Saves - home
## 5978 Offsides - home Saves - home
## 6263 Dangerous Attacks - away Shots Blocked - away
## 6271 Goal Attempts - away Shots Blocked - away
## 6274 Goal Kicks - home Shots Blocked - away
## 6292 Passes - home Shots Blocked - away
## 6309 Shots Outsidebox - away Shots Blocked - away
## 6311 Shots Total - away Shots Blocked - away
## 6315 Successful Dribbles - away Shots Blocked - away
## 6322 Successful Passes - home Shots Blocked - away
## 6324 Successful Passes Percentage - home Shots Blocked - away
## 6329 Total Crosses - away Shots Blocked - away
## 6366 Counter Attacks - home Shots Blocked - home
## 6374 Free Kicks - home Shots Blocked - home
## 6389 Key Passes - away Shots Blocked - home
## 6407 Shots Insidebox - away Shots Blocked - home
## 6409 Shots Off Target - away Shots Blocked - home
## 6417 Substitutions - away Shots Blocked - home
## 6427 Successful Passes Percentage - away Shots Blocked - home
## 6436 Yellowcards - home Shots Blocked - home
## 6459 Attacks - away Shots Insidebox - away
## 6461 Ball Possession % - away Shots Insidebox - away
## 6462 Ball Possession % - home Shots Insidebox - away
## 6470 Counter Attacks - home Shots Insidebox - away
## 6471 Dangerous Attacks - away Shots Insidebox - away
## 6482 Goal Kicks - home Shots Insidebox - away
## 6493 Key Passes - away Shots Insidebox - away
## 6499 Passes - away Shots Insidebox - away
## 6510 Shots Blocked - home Shots Insidebox - away
## 6513 Shots Off Target - away Shots Insidebox - away
## 6519 Shots Total - away Shots Insidebox - away
## 6521 Substitutions - away Shots Insidebox - away
## 6523 Successful Dribbles - away Shots Insidebox - away
## 6529 Successful Passes - away Shots Insidebox - away
## 6531 Successful Passes Percentage - away Shots Insidebox - away
## 6532 Successful Passes Percentage - home Shots Insidebox - away
## 6537 Total Crosses - away Shots Insidebox - away
## 6540 Yellowcards - home Shots Insidebox - away
## 6560 Accurate Crosses - home Shots Insidebox - home
## 6572 Corners - home Shots Insidebox - home
## 6579 Fouls - away Shots Insidebox - home
## 6598 Key Passes - home Shots Insidebox - home
## 6609 Saves - away Shots Insidebox - home
## 6620 Shots On Target - home Shots Insidebox - home
## 6624 Shots Total - home Shots Insidebox - home
## 6625 Substitutions - away Shots Insidebox - home
## 6632 Successful Interceptions - home Shots Insidebox - home
## 6667 Attacks - away Shots Off Target - away
## 6669 Ball Possession % - away Shots Off Target - away
## 6670 Ball Possession % - home Shots Off Target - away
## 6678 Counter Attacks - home Shots Off Target - away
## 6679 Dangerous Attacks - away Shots Off Target - away
## 6690 Goal Kicks - home Shots Off Target - away
## 6701 Key Passes - away Shots Off Target - away
## 6707 Passes - away Shots Off Target - away
## 6718 Shots Blocked - home Shots Off Target - away
## 6719 Shots Insidebox - away Shots Off Target - away
## 6727 Shots Total - away Shots Off Target - away
## 6731 Successful Dribbles - away Shots Off Target - away
## 6737 Successful Passes - away Shots Off Target - away
## 6739 Successful Passes Percentage - away Shots Off Target - away
## 6793 Goal Kicks - away Shots Off Target - home
## 6976 Accurate Crosses - home Shots On Target - home
## 6995 Fouls - away Shots On Target - home
## 7014 Key Passes - home Shots On Target - home
## 7025 Saves - away Shots On Target - home
## 7032 Shots Insidebox - home Shots On Target - home
## 7040 Shots Total - home Shots On Target - home
## 7041 Substitutions - away Shots On Target - home
## 7048 Successful Interceptions - home Shots On Target - home
## 7103 Goal Attempts - away Shots Outsidebox - away
## 7133 Shots Blocked - away Shots Outsidebox - away
## 7224 Long Passes - home Shots Outsidebox - home
## 7291 Attacks - away Shots Total - away
## 7292 Attacks - home Shots Total - away
## 7293 Ball Possession % - away Shots Total - away
## 7294 Ball Possession % - home Shots Total - away
## 7302 Counter Attacks - home Shots Total - away
## 7303 Dangerous Attacks - away Shots Total - away
## 7311 Goal Attempts - away Shots Total - away
## 7314 Goal Kicks - home Shots Total - away
## 7325 Key Passes - away Shots Total - away
## 7341 Shots Blocked - away Shots Total - away
## 7343 Shots Insidebox - away Shots Total - away
## 7345 Shots Off Target - away Shots Total - away
## 7355 Successful Dribbles - away Shots Total - away
## 7361 Successful Passes - away Shots Total - away
## 7364 Successful Passes Percentage - home Shots Total - away
## 7369 Total Crosses - away Shots Total - away
## 7411 Fouls - away Shots Total - home
## 7430 Key Passes - home Shots Total - home
## 7441 Saves - away Shots Total - home
## 7448 Shots Insidebox - home Shots Total - home
## 7452 Shots On Target - home Shots Total - home
## 7457 Substitutions - away Shots Total - home
## 7464 Successful Interceptions - home Shots Total - home
## 7467 Successful Passes Percentage - away Shots Total - home
## 7476 Yellowcards - home Shots Total - home
## 7499 Attacks - away Substitutions - away
## 7501 Ball Possession % - away Substitutions - away
## 7502 Ball Possession % - home Substitutions - away
## 7508 Corners - home Substitutions - away
## 7515 Fouls - away Substitutions - away
## 7534 Key Passes - home Substitutions - away
## 7539 Passes - away Substitutions - away
## 7545 Saves - away Substitutions - away
## 7550 Shots Blocked - home Substitutions - away
## 7551 Shots Insidebox - away Substitutions - away
## 7552 Shots Insidebox - home Substitutions - away
## 7556 Shots On Target - home Substitutions - away
## 7560 Shots Total - home Substitutions - away
## 7569 Successful Passes - away Substitutions - away
## 7571 Successful Passes Percentage - away Substitutions - away
## 7572 Successful Passes Percentage - home Substitutions - away
## 7578 Total Crosses - home Substitutions - away
## 7579 Yellowcards - away Substitutions - away
## 7580 Yellowcards - home Substitutions - away
## 7718 Counter Attacks - home Successful Dribbles - away
## 7719 Dangerous Attacks - away Successful Dribbles - away
## 7730 Goal Kicks - home Successful Dribbles - away
## 7741 Key Passes - away Successful Dribbles - away
## 7757 Shots Blocked - away Successful Dribbles - away
## 7759 Shots Insidebox - away Successful Dribbles - away
## 7761 Shots Off Target - away Successful Dribbles - away
## 7767 Shots Total - away Successful Dribbles - away
## 7780 Successful Passes Percentage - home Successful Dribbles - away
## 7816 Ball Safe - home Successful Dribbles - home
## 7826 Dribble Attempts - home Successful Dribbles - home
## 7841 Injuries - away Successful Dribbles - home
## 8038 Free Kicks - home Successful Headers - home
## 8046 Headers - home Successful Headers - home
## 8055 Long Passes - away Successful Headers - home
## 8197 Tackles - away Successful Interceptions - away
## 8243 Fouls - away Successful Interceptions - home
## 8262 Key Passes - home Successful Interceptions - home
## 8273 Saves - away Successful Interceptions - home
## 8280 Shots Insidebox - home Successful Interceptions - home
## 8284 Shots On Target - home Successful Interceptions - home
## 8288 Shots Total - home Successful Interceptions - home
## 8331 Attacks - away Successful Passes - away
## 8333 Ball Possession % - away Successful Passes - away
## 8334 Ball Possession % - home Successful Passes - away
## 8339 Corners - away Successful Passes - away
## 8340 Corners - home Successful Passes - away
## 8342 Counter Attacks - home Successful Passes - away
## 8343 Dangerous Attacks - away Successful Passes - away
## 8354 Goal Kicks - home Successful Passes - away
## 8365 Key Passes - away Successful Passes - away
## 8371 Passes - away Successful Passes - away
## 8383 Shots Insidebox - away Successful Passes - away
## 8385 Shots Off Target - away Successful Passes - away
## 8391 Shots Total - away Successful Passes - away
## 8393 Substitutions - away Successful Passes - away
## 8403 Successful Passes Percentage - away Successful Passes - away
## 8404 Successful Passes Percentage - home Successful Passes - away
## 8409 Total Crosses - away Successful Passes - away
## 8410 Total Crosses - home Successful Passes - away
## 8436 Attacks - home Successful Passes - home
## 8437 Ball Possession % - away Successful Passes - home
## 8438 Ball Possession % - home Successful Passes - home
## 8440 Ball Safe - home Successful Passes - home
## 8448 Dangerous Attacks - home Successful Passes - home
## 8476 Passes - home Successful Passes - home
## 8485 Shots Blocked - away Successful Passes - home
## 8508 Successful Passes Percentage - home Successful Passes - home
## 8513 Total Crosses - away Successful Passes - home
## 8514 Total Crosses - home Successful Passes - home
## 8539 Attacks - away Successful Passes Percentage - away
## 8541 Ball Possession % - away Successful Passes Percentage - away
## 8542 Ball Possession % - home Successful Passes Percentage - away
## 8548 Corners - home Successful Passes Percentage - away
## 8550 Counter Attacks - home Successful Passes Percentage - away
## 8551 Dangerous Attacks - away Successful Passes Percentage - away
## 8562 Goal Kicks - home Successful Passes Percentage - away
## 8573 Key Passes - away Successful Passes Percentage - away
## 8579 Passes - away Successful Passes Percentage - away
## 8585 Saves - away Successful Passes Percentage - away
## 8590 Shots Blocked - home Successful Passes Percentage - away
## 8591 Shots Insidebox - away Successful Passes Percentage - away
## 8593 Shots Off Target - away Successful Passes Percentage - away
## 8600 Shots Total - home Successful Passes Percentage - away
## 8601 Substitutions - away Successful Passes Percentage - away
## 8609 Successful Passes - away Successful Passes Percentage - away
## 8612 Successful Passes Percentage - home Successful Passes Percentage - away
## 8618 Total Crosses - home Successful Passes Percentage - away
## 8619 Yellowcards - away Successful Passes Percentage - away
## 8620 Yellowcards - home Successful Passes Percentage - away
## 8643 Attacks - away Successful Passes Percentage - home
## 8644 Attacks - home Successful Passes Percentage - home
## 8645 Ball Possession % - away Successful Passes Percentage - home
## 8646 Ball Possession % - home Successful Passes Percentage - home
## 8651 Corners - away Successful Passes Percentage - home
## 8652 Corners - home Successful Passes Percentage - home
## 8654 Counter Attacks - home Successful Passes Percentage - home
## 8655 Dangerous Attacks - away Successful Passes Percentage - home
## 8656 Dangerous Attacks - home Successful Passes Percentage - home
## 8663 Goal Attempts - away Successful Passes Percentage - home
## 8666 Goal Kicks - home Successful Passes Percentage - home
## 8677 Key Passes - away Successful Passes Percentage - home
## 8683 Passes - away Successful Passes Percentage - home
## 8684 Passes - home Successful Passes Percentage - home
## 8693 Shots Blocked - away Successful Passes Percentage - home
## 8695 Shots Insidebox - away Successful Passes Percentage - home
## 8703 Shots Total - away Successful Passes Percentage - home
## 8705 Substitutions - away Successful Passes Percentage - home
## 8707 Successful Dribbles - away Successful Passes Percentage - home
## 8713 Successful Passes - away Successful Passes Percentage - home
## 8714 Successful Passes - home Successful Passes Percentage - home
## 8715 Successful Passes Percentage - away Successful Passes Percentage - home
## 8721 Total Crosses - away Successful Passes Percentage - home
## 8722 Total Crosses - home Successful Passes Percentage - home
## 8784 Long Passes - home Tackles - away
## 8815 Successful Interceptions - away Tackles - away
## 8833 P_away Tackles - away
## 8839 P_away_norm Tackles - away
## 9065 Challenges - away Throwins - home
## 9091 Interceptions - away Throwins - home
## 9163 Attacks - away Total Crosses - away
## 9165 Ball Possession % - away Total Crosses - away
## 9166 Ball Possession % - home Total Crosses - away
## 9171 Corners - away Total Crosses - away
## 9172 Corners - home Total Crosses - away
## 9175 Dangerous Attacks - away Total Crosses - away
## 9183 Goal Attempts - away Total Crosses - away
## 9186 Goal Kicks - home Total Crosses - away
## 9202 Offsides - home Total Crosses - away
## 9203 Passes - away Total Crosses - away
## 9213 Shots Blocked - away Total Crosses - away
## 9215 Shots Insidebox - away Total Crosses - away
## 9223 Shots Total - away Total Crosses - away
## 9233 Successful Passes - away Total Crosses - away
## 9234 Successful Passes - home Total Crosses - away
## 9236 Successful Passes Percentage - home Total Crosses - away
## 9242 Total Crosses - home Total Crosses - away
## 9268 Attacks - home Total Crosses - home
## 9269 Ball Possession % - away Total Crosses - home
## 9270 Ball Possession % - home Total Crosses - home
## 9275 Corners - away Total Crosses - home
## 9276 Corners - home Total Crosses - home
## 9290 Goal Kicks - home Total Crosses - home
## 9307 Passes - away Total Crosses - home
## 9308 Passes - home Total Crosses - home
## 9329 Substitutions - away Total Crosses - home
## 9337 Successful Passes - away Total Crosses - home
## 9338 Successful Passes - home Total Crosses - home
## 9339 Successful Passes Percentage - away Total Crosses - home
## 9340 Successful Passes Percentage - home Total Crosses - home
## 9345 Total Crosses - away Total Crosses - home
## 9368 Accurate Crosses - home Yellowcards - away
## 9417 Saves - away Yellowcards - away
## 9433 Substitutions - away Yellowcards - away
## 9443 Successful Passes Percentage - away Yellowcards - away
## 9510 Key Passes - home Yellowcards - home
## 9526 Shots Blocked - home Yellowcards - home
## 9527 Shots Insidebox - away Yellowcards - home
## 9536 Shots Total - home Yellowcards - home
## 9537 Substitutions - away Yellowcards - home
## 9547 Successful Passes Percentage - away Yellowcards - home
## 9881 fixture_id P_home
## 9913 Goal Kicks - away P_home
## 9928 Long Passes - home P_home
## 9977 P_away P_home
## 9979 P_home_minus_P_away P_home
## 9981 P_home_norm P_home
## 9983 P_away_norm P_home
## 9988 1 P_away
## 10032 Long Passes - home P_away
## 10069 Tackles - away P_away
## 10080 P_home P_away
## 10083 P_home_minus_P_away P_away
## 10085 P_home_norm P_away
## 10087 P_away_norm P_away
## 10190 P_draw_norm P_draw
## 10192 predicted_draw P_draw
## 10240 Long Passes - home P_home_minus_P_away
## 10288 P_home P_home_minus_P_away
## 10289 P_away P_home_minus_P_away
## 10293 P_home_norm P_home_minus_P_away
## 10295 P_away_norm P_home_minus_P_away
## 10401 fixture_id P_home_norm
## 10433 Goal Kicks - away P_home_norm
## 10448 Long Passes - home P_home_norm
## 10496 P_home P_home_norm
## 10497 P_away P_home_norm
## 10499 P_home_minus_P_away P_home_norm
## 10503 P_away_norm P_home_norm
## 10602 P_draw P_draw_norm
## 10608 predicted_draw P_draw_norm
## 10612 1 P_away_norm
## 10656 Long Passes - home P_away_norm
## 10693 Tackles - away P_away_norm
## 10704 P_home P_away_norm
## 10705 P_away P_away_norm
## 10707 P_home_minus_P_away P_away_norm
## 10709 P_home_norm P_away_norm
## 10810 P_draw predicted_draw
## 10814 P_draw_norm predicted_draw
## Freq
## 5 0.8745462
## 33 0.8652475
## 42 -0.8657527
## 54 0.8657527
## 96 0.8916220
## 101 0.8916220
## 225 -0.9438404
## 251 -0.9345203
## 409 0.8577641
## 415 0.8577641
## 417 0.8745462
## 422 0.9046007
## 458 -0.9996599
## 470 0.9996599
## 525 0.9046007
## 562 -0.9119251
## 574 0.9119251
## 785 0.8690829
## 792 0.8594506
## 796 0.8795371
## 819 0.9105507
## 1053 0.9331640
## 1054 -0.9331640
## 1062 0.9239559
## 1063 0.9554754
## 1071 0.8590615
## 1074 0.9443209
## 1085 0.9399353
## 1091 0.9527355
## 1103 0.9485002
## 1105 0.9341039
## 1111 0.9292023
## 1113 -0.8723909
## 1121 0.9582330
## 1123 0.9113811
## 1124 -0.9307463
## 1129 0.9046146
## 1157 -0.8903791
## 1158 0.8903791
## 1167 -0.8754230
## 1168 0.9483605
## 1178 -0.8747841
## 1196 0.9529156
## 1215 -0.8553695
## 1226 0.9708343
## 1228 0.9262470
## 1234 0.9147621
## 1259 0.9331640
## 1260 -0.8903791
## 1262 -1.0000000
## 1267 0.9029211
## 1268 -0.9380456
## 1270 0.8555862
## 1271 0.9311931
## 1282 0.9457371
## 1293 0.8539070
## 1299 0.9552863
## 1311 0.9296235
## 1313 0.8573651
## 1319 0.9028521
## 1321 -0.9073333
## 1329 0.9633340
## 1330 -0.8845332
## 1331 0.9428282
## 1332 -0.9880744
## 1337 0.9361662
## 1338 -0.9532363
## 1363 -0.9331640
## 1364 0.8903791
## 1365 -1.0000000
## 1371 -0.9029211
## 1372 0.9380456
## 1374 -0.8555862
## 1375 -0.9311931
## 1386 -0.9457371
## 1397 -0.8539070
## 1403 -0.9552863
## 1415 -0.9296235
## 1417 -0.8573651
## 1423 -0.9028521
## 1425 0.9073333
## 1433 -0.9633340
## 1434 0.8845332
## 1435 -0.9428282
## 1436 0.9880744
## 1441 -0.9361662
## 1442 0.9532363
## 1474 0.8905176
## 1500 0.9165761
## 1601 0.9692010
## 1612 0.8703329
## 1636 0.8904250
## 1642 0.8528933
## 1667 -0.9438404
## 1707 0.9936542
## 1752 0.8535079
## 1783 0.8905176
## 1811 0.8535568
## 1812 0.9804921
## 1885 0.9029211
## 1886 -0.9029211
## 1892 -0.9469232
## 1907 -0.8719981
## 1922 0.9429304
## 1923 0.8669468
## 1930 0.9365492
## 1953 0.8641072
## 1956 -0.8920524
## 1961 0.9323863
## 1962 -0.8799688
## 1989 -0.9380456
## 1990 0.9380456
## 1995 -0.9469232
## 2011 0.8708211
## 2026 -0.8746266
## 2027 -0.9266830
## 2034 -0.9148891
## 2040 0.8545596
## 2049 0.8575380
## 2057 -0.9237960
## 2059 -0.8563477
## 2060 0.8985082
## 2065 -0.8640713
## 2066 0.9320170
## 2195 0.9239559
## 2197 0.8555862
## 2198 -0.8555862
## 2207 0.9073550
## 2218 0.9264997
## 2229 0.9485199
## 2235 0.8777409
## 2246 -0.8712226
## 2247 0.8880383
## 2249 0.9083823
## 2255 0.8717703
## 2259 0.8912864
## 2265 0.8912617
## 2267 0.8897374
## 2268 -0.8652567
## 2299 0.9554754
## 2300 -0.8754230
## 2301 0.9311931
## 2302 -0.9311931
## 2310 0.9073550
## 2319 0.9382655
## 2322 0.9659524
## 2333 0.9543662
## 2339 0.8776421
## 2349 0.9358448
## 2351 0.9277969
## 2353 0.8966097
## 2359 0.9874107
## 2363 0.9076898
## 2369 0.8934819
## 2371 0.8641769
## 2372 -0.9599913
## 2377 0.9499108
## 2404 0.9483605
## 2425 0.8797869
## 2444 0.9444970
## 2474 0.9400409
## 2476 0.8516399
## 2676 0.8505623
## 2750 0.9524194
## 2768 0.9204578
## 2772 0.9150332
## 2776 0.9264875
## 2777 0.8698684
## 2784 0.9641935
## 2837 0.9466428
## 2861 0.9176379
## 2940 0.9466428
## 2965 0.8845826
## 3054 0.8667377
## 3078 0.8546523
## 3094 0.8861449
## 3131 0.8590615
## 3143 0.9382655
## 3154 0.8732501
## 3165 0.8933450
## 3181 0.9130218
## 3189 0.8567980
## 3191 0.9476673
## 3204 -0.8574849
## 3209 0.8813586
## 3329 0.8652475
## 3352 0.8797869
## 3394 0.8822690
## 3424 0.8759197
## 3429 0.8759197
## 3443 0.9443209
## 3444 -0.8747841
## 3445 0.9457371
## 3446 -0.9457371
## 3454 0.9264997
## 3455 0.9659524
## 3463 0.8732501
## 3477 0.9403474
## 3483 0.9079713
## 3493 0.8895260
## 3495 0.9327395
## 3497 0.9061749
## 3503 0.9440636
## 3507 0.9511419
## 3513 0.9211390
## 3515 0.9058417
## 3516 -0.9613752
## 3521 0.9089071
## 3522 -0.8990171
## 3555 -0.8719981
## 3556 0.8708211
## 3594 -0.8545301
## 3878 0.8667377
## 3895 0.8615998
## 3926 0.9511375
## 4176 0.9692010
## 4212 0.8509430
## 4236 0.8713148
## 4265 -0.8657527
## 4269 -0.9996599
## 4270 -0.9119251
## 4318 -1.0000000
## 4371 -0.9345203
## 4385 0.9936542
## 4386 0.8535568
## 4456 0.8641738
## 4487 0.9165761
## 4490 0.9804921
## 4587 0.9399353
## 4589 0.8539070
## 4590 -0.8539070
## 4598 0.9485199
## 4599 0.9543662
## 4607 0.8933450
## 4610 0.9403474
## 4638 -0.8615917
## 4639 0.9338748
## 4641 0.9422748
## 4647 0.9429351
## 4651 0.9288129
## 4657 0.8563121
## 4659 0.8552192
## 4660 -0.8797664
## 4707 0.9524194
## 4737 0.8669535
## 4744 0.9242750
## 4748 0.9643125
## 4752 0.9644753
## 4753 0.8997230
## 4760 0.9325304
## 4772 -0.8601764
## 4822 0.8615998
## 4862 0.9057629
## 4958 0.8742613
## 4973 0.8927265
## 4984 0.9044191
## 4985 -0.9355816
## 4987 0.9411901
## 4989 0.9044191
## 4991 -0.9355816
## 5115 0.9429304
## 5116 -0.8746266
## 5154 0.9167603
## 5185 0.8632672
## 5211 0.9527355
## 5213 0.9552863
## 5214 -0.9552863
## 5219 0.8669468
## 5220 -0.9266830
## 5222 0.8777409
## 5223 0.8776421
## 5234 0.9079713
## 5263 0.9053212
## 5265 0.8838026
## 5273 -0.9376310
## 5281 0.9987161
## 5283 0.9474154
## 5284 -0.9198038
## 5289 0.8721006
## 5290 -0.8877447
## 5316 0.9529156
## 5320 0.8703329
## 5328 0.9444970
## 5345 0.8509430
## 5365 -0.8587611
## 5386 0.9932060
## 5388 0.8828843
## 5394 0.8905173
## 5436 0.9176379
## 5437 0.8845826
## 5513 0.8657527
## 5517 0.9996599
## 5518 0.9119251
## 5554 -1.0000000
## 5832 0.8690829
## 5870 0.8669535
## 5888 0.9036400
## 5892 0.9301725
## 5896 0.8739967
## 5897 0.8591751
## 5904 0.8640974
## 5907 -0.8742453
## 5915 0.8531051
## 5947 0.9365492
## 5948 -0.9148891
## 5963 -0.8545301
## 5978 0.9167603
## 6263 0.9358448
## 6271 0.9130218
## 6274 0.8895260
## 6292 -0.8587611
## 6309 0.8823611
## 6311 0.9507821
## 6315 0.8516393
## 6322 -0.8586325
## 6324 -0.8992771
## 6329 0.9349576
## 6366 -0.8712226
## 6374 0.8546523
## 6389 -0.8615917
## 6407 -0.8933945
## 6409 -0.8605906
## 6417 0.8620690
## 6427 -0.8899248
## 6436 -0.9311509
## 6459 0.9485002
## 6461 0.9296235
## 6462 -0.9296235
## 6470 0.8880383
## 6471 0.9277969
## 6482 0.9327395
## 6493 0.9338748
## 6499 0.9053212
## 6510 -0.8933945
## 6513 0.9347546
## 6519 0.9030471
## 6521 -0.9003878
## 6523 0.8669981
## 6529 0.9168102
## 6531 0.9435841
## 6532 -0.9252107
## 6537 0.8555272
## 6540 0.8895127
## 6560 0.8594506
## 6572 0.8545596
## 6579 0.9204578
## 6598 0.9242750
## 6609 0.9036400
## 6620 0.9107344
## 6624 0.9313446
## 6625 0.8944687
## 6632 0.9173707
## 6667 0.9341039
## 6669 0.8573651
## 6670 -0.8573651
## 6678 0.9083823
## 6679 0.8966097
## 6690 0.9061749
## 6701 0.9422748
## 6707 0.8838026
## 6718 -0.8605906
## 6719 0.9347546
## 6727 0.8770747
## 6731 0.8652352
## 6737 0.8896423
## 6739 0.8957062
## 6793 0.8822690
## 6976 0.8795371
## 6995 0.9150332
## 7014 0.9643125
## 7025 0.9301725
## 7032 0.9107344
## 7040 0.9348084
## 7041 0.8734474
## 7048 0.9204382
## 7103 0.8567980
## 7133 0.8823611
## 7224 0.8742613
## 7291 0.9292023
## 7292 -0.8553695
## 7293 0.9028521
## 7294 -0.9028521
## 7302 0.8717703
## 7303 0.9874107
## 7311 0.9476673
## 7314 0.9440636
## 7325 0.9429351
## 7341 0.9507821
## 7343 0.9030471
## 7345 0.8770747
## 7355 0.8853334
## 7361 0.8556579
## 7364 -0.9425605
## 7369 0.9512430
## 7411 0.9264875
## 7430 0.9644753
## 7441 0.8739967
## 7448 0.9313446
## 7452 0.9348084
## 7457 0.9403069
## 7464 0.8823549
## 7467 -0.9109371
## 7476 -0.8613417
## 7499 -0.8723909
## 7501 -0.9073333
## 7502 0.9073333
## 7508 0.8575380
## 7515 0.8698684
## 7534 0.8997230
## 7539 -0.9376310
## 7545 0.8591751
## 7550 0.8620690
## 7551 -0.9003878
## 7552 0.8944687
## 7556 0.8734474
## 7560 0.9403069
## 7569 -0.9423399
## 7571 -0.9779021
## 7572 0.8619107
## 7578 0.8580472
## 7579 0.9149445
## 7580 -0.9233751
## 7718 0.8912864
## 7719 0.9076898
## 7730 0.9511419
## 7741 0.9288129
## 7757 0.8516393
## 7759 0.8669981
## 7761 0.8652352
## 7767 0.8853334
## 7780 -0.8539563
## 7816 0.8904250
## 7826 0.8505623
## 7841 0.8713148
## 8038 0.8861449
## 8046 0.9511375
## 8055 0.9057629
## 8197 0.8524422
## 8243 0.9641935
## 8262 0.9325304
## 8273 0.8640974
## 8280 0.9173707
## 8284 0.9204382
## 8288 0.8823549
## 8331 0.9582330
## 8333 0.9633340
## 8334 -0.9633340
## 8339 0.8641072
## 8340 -0.9237960
## 8342 0.8912617
## 8343 0.8934819
## 8354 0.9211390
## 8365 0.8563121
## 8371 0.9987161
## 8383 0.9168102
## 8385 0.8896423
## 8391 0.8556579
## 8393 -0.9423399
## 8403 0.9558808
## 8404 -0.9333600
## 8409 0.8808230
## 8410 -0.8999025
## 8436 0.9708343
## 8437 -0.8845332
## 8438 0.8845332
## 8440 0.8528933
## 8448 0.9400409
## 8476 0.9932060
## 8485 -0.8586325
## 8508 0.9209462
## 8513 -0.8576602
## 8514 0.9253651
## 8539 0.9113811
## 8541 0.9428282
## 8542 -0.9428282
## 8548 -0.8563477
## 8550 0.8897374
## 8551 0.8641769
## 8562 0.9058417
## 8573 0.8552192
## 8579 0.9474154
## 8585 -0.8742453
## 8590 -0.8899248
## 8591 0.9435841
## 8593 0.8957062
## 8600 -0.9109371
## 8601 -0.9779021
## 8609 0.9558808
## 8612 -0.9119276
## 8618 -0.8885370
## 8619 -0.8868385
## 8620 0.9108639
## 8643 -0.9307463
## 8644 0.9262470
## 8645 -0.9880744
## 8646 0.9880744
## 8651 -0.8920524
## 8652 0.8985082
## 8654 -0.8652567
## 8655 -0.9599913
## 8656 0.8516399
## 8663 -0.8574849
## 8666 -0.9613752
## 8677 -0.8797664
## 8683 -0.9198038
## 8684 0.8828843
## 8693 -0.8992771
## 8695 -0.9252107
## 8703 -0.9425605
## 8705 0.8619107
## 8707 -0.8539563
## 8713 -0.9333600
## 8714 0.9209462
## 8715 -0.9119276
## 8721 -0.9543147
## 8722 0.9446555
## 8784 0.8927265
## 8815 0.8524422
## 8833 -0.8502340
## 8839 -0.8502340
## 9065 0.8535079
## 9091 0.8641738
## 9163 0.9046146
## 9165 0.9361662
## 9166 -0.9361662
## 9171 0.9323863
## 9172 -0.8640713
## 9175 0.9499108
## 9183 0.8813586
## 9186 0.9089071
## 9202 0.8632672
## 9203 0.8721006
## 9213 0.9349576
## 9215 0.8555272
## 9223 0.9512430
## 9233 0.8808230
## 9234 -0.8576602
## 9236 -0.9543147
## 9242 -0.8662924
## 9268 0.9147621
## 9269 -0.9532363
## 9270 0.9532363
## 9275 -0.8799688
## 9276 0.9320170
## 9290 -0.8990171
## 9307 -0.8877447
## 9308 0.8905173
## 9329 0.8580472
## 9337 -0.8999025
## 9338 0.9253651
## 9339 -0.8885370
## 9340 0.9446555
## 9345 -0.8662924
## 9368 0.9105507
## 9417 0.8531051
## 9433 0.9149445
## 9443 -0.8868385
## 9510 -0.8601764
## 9526 -0.9311509
## 9527 0.8895127
## 9536 -0.8613417
## 9537 -0.9233751
## 9547 0.9108639
## 9881 0.8916220
## 9913 0.8759197
## 9928 0.9044191
## 9977 -0.9170017
## 9979 0.9741924
## 9981 1.0000000
## 9983 -0.9170017
## 9988 0.8577641
## 10032 -0.9355816
## 10069 -0.8502340
## 10080 -0.9170017
## 10083 -0.9833717
## 10085 -0.9170017
## 10087 1.0000000
## 10190 1.0000000
## 10192 0.9520230
## 10240 0.9411901
## 10288 0.9741924
## 10289 -0.9833717
## 10293 0.9741924
## 10295 -0.9833717
## 10401 0.8916220
## 10433 0.8759197
## 10448 0.9044191
## 10496 1.0000000
## 10497 -0.9170017
## 10499 0.9741924
## 10503 -0.9170017
## 10602 1.0000000
## 10608 0.9520230
## 10612 0.8577641
## 10656 -0.9355816
## 10693 -0.8502340
## 10704 -0.9170017
## 10705 1.0000000
## 10707 -0.9833717
## 10709 -0.9170017
## 10810 0.9520230
## 10814 0.9520230
match_data_special
## # A tibble: 52,847 × 115
## fixture_id halftime current_time half_start_datetime
## <dbl> <chr> <dttm> <dttm>
## 1 19134453 1st-half 2024-08-16 19:01:19 2024-08-16 19:00:31
## 2 19134453 1st-half 2024-08-16 19:02:18 2024-08-16 19:00:31
## 3 19134453 1st-half 2024-08-16 19:03:19 2024-08-16 19:00:31
## 4 19134453 1st-half 2024-08-16 19:04:18 2024-08-16 19:00:31
## 5 19134453 1st-half 2024-08-16 19:05:19 2024-08-16 19:00:31
## 6 19134453 1st-half 2024-08-16 19:06:18 2024-08-16 19:00:31
## 7 19134453 1st-half 2024-08-16 19:07:18 2024-08-16 19:00:31
## 8 19134453 1st-half 2024-08-16 19:08:19 2024-08-16 19:00:31
## 9 19134453 1st-half 2024-08-16 19:09:19 2024-08-16 19:00:31
## 10 19134453 1st-half 2024-08-16 19:10:18 2024-08-16 19:00:31
## # ℹ 52,837 more rows
## # ℹ 111 more variables: match_start_datetime <dttm>, minute <dbl>,
## # second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## # stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## # `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## # `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## # `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>, …
library(dplyr)
library(rpart)
library(rpart.plot)
library(caret)
## Loading required package: lattice
library(ggplot2)
numerical_columns <- match_data_special %>%
select(where(is.numeric)) %>%
colnames()
first_half_data <- match_data_special %>% filter(halftime == "1st-half")
second_half_data <- match_data_special %>% filter(halftime == "2nd-half")
train_decision_tree <- function(data, tree_columns, target, maxdepth = 12, minsplit = 4, cp = 0.004) {
X <- data %>% select(all_of(tree_columns))
y <- data[[target]]
set.seed(42)
train_indices <- createDataPartition(y, p = 0.8, list = FALSE)
X_train <- X[train_indices, ]
X_test <- X[-train_indices, ]
y_train <- y[train_indices]
y_test <- y[-train_indices]
decision_tree <- rpart(y_train ~ .,
data = X_train,
method = "class",
control = rpart.control(maxdepth = maxdepth,
minsplit = minsplit,
cp = cp))
predictions <- predict(decision_tree, newdata = X_test, type = "class")
confusion_matrix <- table(Predicted = predictions, Actual = y_test)
accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
feature_importances <- decision_tree$variable.importance
list(
model = decision_tree,
accuracy = accuracy,
confusion_matrix = confusion_matrix,
feature_importances = feature_importances
)
}
tree_columns <- setdiff(numerical_columns, c("P_home", "P_away", "P_draw",
"P_home_norm", "P_away_norm",
"P_draw_norm", "name", "Total_odds",
"final_score", "P_home_minus_P_away",
"total_prob", "fixture_id", "halftime", "Assists - home", "Assists - away", "second", "Ball Possession % - away", "Substitutions - home"))
first_half_result <- train_decision_tree(first_half_data, tree_columns, "result")
print("First Half Accuracy:")
## [1] "First Half Accuracy:"
print(round(first_half_result$accuracy, 2))
## [1] 0.62
print("First Half Confusion Matrix:")
## [1] "First Half Confusion Matrix:"
print(first_half_result$confusion_matrix)
## Actual
## Predicted 1 2 X
## 1 1668 230 438
## 2 369 1116 367
## X 382 283 634
cat("Feature Importance (First Half):\n")
## Feature Importance (First Half):
importance_df_first_half <- data.frame(
Feature = names(first_half_result$feature_importances),
Importance = first_half_result$feature_importances
)
importance_df_first_half <- importance_df_first_half[order(-importance_df_first_half$Importance), ]
print(importance_df_first_half)
## Feature
## 2 2
## 1 1
## X X
## Goals - away Goals - away
## Ball Possession % - home Ball Possession % - home
## Successful Passes Percentage - away Successful Passes Percentage - away
## Goals - home Goals - home
## Shots On Target - away Shots On Target - away
## Shots On Target - home Shots On Target - home
## Shots Total - home Shots Total - home
## Headers - away Headers - away
## Successful Headers - away Successful Headers - away
## Shots Insidebox - home Shots Insidebox - home
## Saves - away Saves - away
## Passes - away Passes - away
## Corners - home Corners - home
## Tackles - home Tackles - home
## minute minute
## Interceptions - home Interceptions - home
## Successful Passes - away Successful Passes - away
## Challenges - home Challenges - home
## Successful Interceptions - home Successful Interceptions - home
## Successful Dribbles - home Successful Dribbles - home
## Accurate Crosses - away Accurate Crosses - away
## Key Passes - home Key Passes - home
## Passes - home Passes - home
## Attacks - away Attacks - away
## Shots Total - away Shots Total - away
## Ball Safe - away Ball Safe - away
## Total Crosses - away Total Crosses - away
## Headers - home Headers - home
## Successful Headers - home Successful Headers - home
## Total Crosses - home Total Crosses - home
## Ball Safe - home Ball Safe - home
## Accurate Crosses - home Accurate Crosses - home
## Attacks - home Attacks - home
## Dangerous Attacks - home Dangerous Attacks - home
## Successful Passes - home Successful Passes - home
## Shots Insidebox - away Shots Insidebox - away
## Shots Blocked - away Shots Blocked - away
## Key Passes - away Key Passes - away
## Goal Attempts - home Goal Attempts - home
## Goal Attempts - away Goal Attempts - away
## Dribble Attempts - home Dribble Attempts - home
## Throwins - away Throwins - away
## Dangerous Attacks - away Dangerous Attacks - away
## Dribble Attempts - away Dribble Attempts - away
## Successful Passes Percentage - home Successful Passes Percentage - home
## Injuries - home Injuries - home
## Throwins - home Throwins - home
## Injuries - away Injuries - away
## Shots Outsidebox - home Shots Outsidebox - home
## Importance
## 2 2684.165778
## 1 2410.840000
## X 776.977267
## Goals - away 493.810993
## Ball Possession % - home 340.865975
## Successful Passes Percentage - away 273.639747
## Goals - home 235.883354
## Shots On Target - away 193.649168
## Shots On Target - home 188.144787
## Shots Total - home 162.734170
## Headers - away 78.941232
## Successful Headers - away 56.281479
## Shots Insidebox - home 53.722623
## Saves - away 52.583416
## Passes - away 46.530768
## Corners - home 45.162033
## Tackles - home 45.120282
## minute 42.985758
## Interceptions - home 42.641307
## Successful Passes - away 40.203392
## Challenges - home 40.132995
## Successful Interceptions - home 39.068780
## Successful Dribbles - home 38.992453
## Accurate Crosses - away 37.664640
## Key Passes - home 35.754829
## Passes - home 34.953696
## Attacks - away 34.414704
## Shots Total - away 32.805566
## Ball Safe - away 32.748627
## Total Crosses - away 30.526128
## Headers - home 28.662979
## Successful Headers - home 28.240373
## Total Crosses - home 27.357770
## Ball Safe - home 24.984692
## Accurate Crosses - home 24.952862
## Attacks - home 24.424101
## Dangerous Attacks - home 24.423344
## Successful Passes - home 21.078971
## Shots Insidebox - away 18.636558
## Shots Blocked - away 16.992156
## Key Passes - away 16.209107
## Goal Attempts - home 15.119241
## Goal Attempts - away 13.718231
## Dribble Attempts - home 12.921918
## Throwins - away 10.402764
## Dangerous Attacks - away 7.839017
## Dribble Attempts - away 7.086955
## Successful Passes Percentage - home 6.020380
## Injuries - home 5.428917
## Throwins - home 4.239466
## Injuries - away 2.977148
## Shots Outsidebox - home 2.947104
print("Decision Tree Details (First Half):")
## [1] "Decision Tree Details (First Half):"
printcp(first_half_result$model) # Hangi parametrelere göre kırıldığını gösterir
##
## Classification tree:
## rpart(formula = y_train ~ ., data = X_train, method = "class",
## control = rpart.control(maxdepth = maxdepth, minsplit = minsplit,
## cp = cp))
##
## Variables actually used in tree construction:
## [1] 1 2
## [3] Accurate Crosses - away Ball Possession % - home
## [5] Ball Safe - away Corners - home
## [7] Headers - away Interceptions - home
## [9] Shots On Target - home Successful Dribbles - home
## [11] Successful Passes Percentage - away Tackles - home
## [13] X
##
## Root node error: 12275/21954 = 0.55912
##
## n= 21954
##
## CP nsplit rel error xerror xstd
## 1 0.238126 0 1.00000 1.00000 0.0059930
## 2 0.004372 1 0.76187 0.76660 0.0059736
## 3 0.004334 11 0.70623 0.72049 0.0059203
## 4 0.004000 18 0.67022 0.68424 0.0058666
summary(first_half_result$model) # Karar ağacının detaylı yapısını yazdırır
## Call:
## rpart(formula = y_train ~ ., data = X_train, method = "class",
## control = rpart.control(maxdepth = maxdepth, minsplit = minsplit,
## cp = cp))
## n= 21954
##
## CP nsplit rel error xerror xstd
## 1 0.238126273 0 1.0000000 1.0000000 0.005993047
## 2 0.004372030 1 0.7618737 0.7665988 0.005973578
## 3 0.004334012 11 0.7062322 0.7204888 0.005920350
## 4 0.004000000 18 0.6702240 0.6842363 0.005866583
##
## Variable importance
## 2 1
## 30 27
## X Goals - away
## 9 6
## Ball Possession % - home Successful Passes Percentage - away
## 4 3
## Goals - home Shots On Target - away
## 3 2
## Shots On Target - home Shots Total - home
## 2 2
## Headers - away Successful Headers - away
## 1 1
## Shots Insidebox - home Saves - away
## 1 1
## Passes - away Corners - home
## 1 1
## Tackles - home
## 1
##
## Node number 1: 21954 observations, complexity param=0.2381263
## predicted class=1 expected loss=0.5591236 P(node) =1
## class counts: 9679 6517 5758
## probabilities: 0.441 0.297 0.262
## left son=2 (14100 obs) right son=3 (7854 obs)
## Primary splits:
## 2 < 2.61 to the right, improve=1631.0870, (0 missing)
## 1 < 1.925 to the left, improve=1621.5490, (0 missing)
## Goals - home < 0.5 to the right, improve= 675.1070, (0 missing)
## X < 4.875 to the right, improve= 384.1266, (0 missing)
## Goals - away < 0.5 to the left, improve= 318.0188, (0 missing)
## Surrogate splits:
## 1 < 2.775 to the left, agree=0.957, adj=0.880, (0 split)
## Goals - away < 0.5 to the left, agree=0.738, adj=0.268, (0 split)
## Ball Possession % - home < 38.5 to the right, agree=0.697, adj=0.154, (0 split)
## Successful Passes Percentage - away < 86.5 to the left, agree=0.686, adj=0.122, (0 split)
## Shots On Target - away < 1.5 to the left, agree=0.682, adj=0.111, (0 split)
##
## Node number 2: 14100 observations, complexity param=0.00437203
## predicted class=1 expected loss=0.418156 P(node) =0.642252
## class counts: 8204 2119 3777
## probabilities: 0.582 0.150 0.268
## left son=4 (4959 obs) right son=5 (9141 obs)
## Primary splits:
## 2 < 6.75 to the right, improve=676.0262, (0 missing)
## 1 < 1.42 to the left, improve=614.7590, (0 missing)
## X < 4.415 to the right, improve=576.4455, (0 missing)
## Goals - home < 0.5 to the right, improve=237.6263, (0 missing)
## Shots On Target - home < 1.5 to the right, improve=105.3575, (0 missing)
## Surrogate splits:
## 1 < 1.515 to the left, agree=0.960, adj=0.886, (0 split)
## X < 4.265 to the right, agree=0.913, adj=0.753, (0 split)
## Goals - home < 0.5 to the right, agree=0.770, adj=0.346, (0 split)
## Shots On Target - home < 1.5 to the right, agree=0.712, adj=0.180, (0 split)
## Shots Total - home < 5.5 to the right, agree=0.700, adj=0.146, (0 split)
##
## Node number 3: 7854 observations, complexity param=0.004334012
## predicted class=2 expected loss=0.4400306 P(node) =0.357748
## class counts: 1475 4398 1981
## probabilities: 0.188 0.560 0.252
## left son=6 (6000 obs) right son=7 (1854 obs)
## Primary splits:
## 2 < 1.42 to the right, improve=217.70540, (0 missing)
## X < 4.265 to the left, improve=211.15870, (0 missing)
## 1 < 7.25 to the left, improve=202.93600, (0 missing)
## Goals - away < 1.5 to the right, improve= 60.46323, (0 missing)
## Goals - home < 0.5 to the right, improve= 51.34057, (0 missing)
## Surrogate splits:
## 1 < 7.75 to the left, agree=0.972, adj=0.882, (0 split)
## X < 4.415 to the left, agree=0.969, adj=0.868, (0 split)
## Goals - away < 1.5 to the left, agree=0.826, adj=0.262, (0 split)
## Shots Total - away < 7.5 to the left, agree=0.780, adj=0.069, (0 split)
## Shots On Target - away < 3.5 to the left, agree=0.778, adj=0.061, (0 split)
##
## Node number 4: 4959 observations
## predicted class=1 expected loss=0.1774551 P(node) =0.2258814
## class counts: 4079 283 597
## probabilities: 0.823 0.057 0.120
##
## Node number 5: 9141 observations, complexity param=0.00437203
## predicted class=1 expected loss=0.5487365 P(node) =0.4163706
## class counts: 4125 1836 3180
## probabilities: 0.451 0.201 0.348
## left son=10 (3459 obs) right son=11 (5682 obs)
## Primary splits:
## 1 < 1.925 to the left, improve=80.40346, (0 missing)
## 2 < 3.775 to the right, improve=64.04850, (0 missing)
## X < 3.45 to the right, improve=49.52885, (0 missing)
## Goals - away < 0.5 to the right, improve=39.20633, (0 missing)
## Fouls - home < 3.5 to the left, improve=35.68039, (0 missing)
## Surrogate splits:
## 2 < 4.1 to the right, agree=0.902, adj=0.742, (0 split)
## X < 3.45 to the right, agree=0.823, adj=0.533, (0 split)
## Ball Possession % - home < 64.5 to the right, agree=0.670, adj=0.128, (0 split)
## Successful Passes Percentage - home < 89.5 to the right, agree=0.650, adj=0.075, (0 split)
## Attacks - away < 7.5 to the left, agree=0.636, adj=0.039, (0 split)
##
## Node number 6: 6000 observations, complexity param=0.004334012
## predicted class=2 expected loss=0.5155 P(node) =0.2732987
## class counts: 1373 2907 1720
## probabilities: 0.229 0.484 0.287
## left son=12 (2248 obs) right son=13 (3752 obs)
## Primary splits:
## 1 < 3.55 to the left, improve=77.02462, (0 missing)
## Successful Dribbles - home < 1.5 to the left, improve=66.21267, (0 missing)
## Shots On Target - home < 0.5 to the left, improve=55.00707, (0 missing)
## Throwins - away < 8.5 to the left, improve=51.07116, (0 missing)
## Goals - away < 0.5 to the right, improve=46.61765, (0 missing)
## Surrogate splits:
## 2 < 2.15 to the right, agree=0.863, adj=0.635, (0 split)
## Successful Passes - away < 41.5 to the left, agree=0.652, adj=0.072, (0 split)
## Ball Possession % - home < 64.5 to the right, agree=0.651, adj=0.069, (0 split)
## Passes - away < 40.5 to the left, agree=0.649, adj=0.062, (0 split)
## Attacks - away < 11.5 to the left, agree=0.646, adj=0.056, (0 split)
##
## Node number 7: 1854 observations
## predicted class=2 expected loss=0.1957929 P(node) =0.0844493
## class counts: 102 1491 261
## probabilities: 0.055 0.804 0.141
##
## Node number 10: 3459 observations, complexity param=0.00437203
## predicted class=1 expected loss=0.4518647 P(node) =0.1575567
## class counts: 1896 480 1083
## probabilities: 0.548 0.139 0.313
## left son=20 (446 obs) right son=21 (3013 obs)
## Primary splits:
## Headers - away < 8.5 to the right, improve=41.78433, (0 missing)
## Successful Passes Percentage - home < 82.5 to the left, improve=40.26706, (0 missing)
## Successful Headers - away < 5.5 to the right, improve=35.74819, (0 missing)
## Substitutions - away < 0.5 to the left, improve=31.74343, (0 missing)
## Interceptions - home < 1.5 to the left, improve=25.07350, (0 missing)
## Surrogate splits:
## Successful Headers - away < 4.5 to the right, agree=0.950, adj=0.612, (0 split)
## Headers - home < 9.5 to the right, agree=0.912, adj=0.314, (0 split)
## Successful Headers - home < 5.5 to the right, agree=0.910, adj=0.298, (0 split)
## Total Crosses - away < 8.5 to the right, agree=0.890, adj=0.143, (0 split)
## Throwins - away < 9.5 to the right, agree=0.885, adj=0.108, (0 split)
##
## Node number 11: 5682 observations, complexity param=0.00437203
## predicted class=1 expected loss=0.6077086 P(node) =0.2588139
## class counts: 2229 1356 2097
## probabilities: 0.392 0.239 0.369
## left son=22 (2585 obs) right son=23 (3097 obs)
## Primary splits:
## 2 < 3.225 to the left, improve=41.66928, (0 missing)
## Interceptions - home < 6.5 to the left, improve=34.53119, (0 missing)
## Challenges - home < 6.5 to the left, improve=33.62570, (0 missing)
## Total Crosses - away < 5.5 to the left, improve=33.02818, (0 missing)
## Goal Attempts - home < 2.5 to the right, improve=32.36730, (0 missing)
## Surrogate splits:
## 1 < 2.325 to the right, agree=0.755, adj=0.461, (0 split)
## Successful Passes Percentage - away < 85.5 to the right, agree=0.616, adj=0.156, (0 split)
## Accurate Crosses - home < 0.5 to the left, agree=0.601, adj=0.124, (0 split)
## Attacks - home < 22.5 to the left, agree=0.600, adj=0.120, (0 split)
## Throwins - home < 4.5 to the left, agree=0.591, adj=0.102, (0 split)
##
## Node number 12: 2248 observations, complexity param=0.004334012
## predicted class=2 expected loss=0.6107651 P(node) =0.1023959
## class counts: 762 875 611
## probabilities: 0.339 0.389 0.272
## left son=24 (2076 obs) right son=25 (172 obs)
## Primary splits:
## Successful Dribbles - home < 3.5 to the left, improve=38.99245, (0 missing)
## Corners - home < 2.5 to the right, improve=33.21658, (0 missing)
## Successful Passes Percentage - away < 86.5 to the left, improve=27.99142, (0 missing)
## Successful Passes Percentage - home < 80.5 to the right, improve=26.08567, (0 missing)
## X < 3.55 to the right, improve=25.86873, (0 missing)
## Surrogate splits:
## Dribble Attempts - home < 7.5 to the left, agree=0.949, adj=0.331, (0 split)
## Attacks - home < 55.5 to the left, agree=0.932, adj=0.116, (0 split)
## Shots Outsidebox - home < 4.5 to the left, agree=0.929, adj=0.076, (0 split)
## Dangerous Attacks - home < 33.5 to the left, agree=0.928, adj=0.064, (0 split)
## Saves - away < 2.5 to the left, agree=0.928, adj=0.058, (0 split)
##
## Node number 13: 3752 observations, complexity param=0.004334012
## predicted class=2 expected loss=0.4584222 P(node) =0.1709028
## class counts: 611 2032 1109
## probabilities: 0.163 0.542 0.296
## left son=26 (1925 obs) right son=27 (1827 obs)
## Primary splits:
## Shots On Target - home < 0.5 to the left, improve=64.10606, (0 missing)
## Successful Dribbles - home < 1.5 to the right, improve=48.32103, (0 missing)
## Shots On Target - away < 2.5 to the right, improve=47.72801, (0 missing)
## Passes - away < 235.5 to the left, improve=46.62827, (0 missing)
## Throwins - away < 9.5 to the left, improve=46.60922, (0 missing)
## Surrogate splits:
## Saves - away < 0.5 to the left, agree=0.895, adj=0.785, (0 split)
## Shots Total - home < 1.5 to the left, agree=0.820, adj=0.630, (0 split)
## Key Passes - home < 1.5 to the left, agree=0.785, adj=0.558, (0 split)
## Shots Insidebox - home < 1.5 to the left, agree=0.771, adj=0.530, (0 split)
## minute < 23.5 to the left, agree=0.721, adj=0.428, (0 split)
##
## Node number 20: 446 observations
## predicted class=1 expected loss=0.2511211 P(node) =0.0203152
## class counts: 334 63 49
## probabilities: 0.749 0.141 0.110
##
## Node number 21: 3013 observations, complexity param=0.00437203
## predicted class=1 expected loss=0.4815798 P(node) =0.1372415
## class counts: 1562 417 1034
## probabilities: 0.518 0.138 0.343
## left son=42 (2027 obs) right son=43 (986 obs)
## Primary splits:
## Interceptions - home < 1.5 to the left, improve=42.64131, (0 missing)
## Challenges - away < 5.5 to the left, improve=42.00108, (0 missing)
## Challenges - home < 1.5 to the left, improve=41.66064, (0 missing)
## Interceptions - away < 5.5 to the left, improve=40.81529, (0 missing)
## Successful Interceptions - home < 4.5 to the left, improve=37.80986, (0 missing)
## Surrogate splits:
## Challenges - home < 1.5 to the left, agree=0.981, adj=0.941, (0 split)
## Successful Interceptions - home < 1.5 to the left, agree=0.799, adj=0.384, (0 split)
## Passes - home < 114.5 to the left, agree=0.789, adj=0.356, (0 split)
## Attacks - home < 29.5 to the left, agree=0.787, adj=0.349, (0 split)
## Passes - away < 92.5 to the left, agree=0.783, adj=0.338, (0 split)
##
## Node number 22: 2585 observations, complexity param=0.00437203
## predicted class=1 expected loss=0.5907157 P(node) =0.1177462
## class counts: 1058 762 765
## probabilities: 0.409 0.295 0.296
## left son=44 (1248 obs) right son=45 (1337 obs)
## Primary splits:
## Corners - home < 0.5 to the right, improve=45.16203, (0 missing)
## Long Passes - home < 3.5 to the right, improve=43.58502, (0 missing)
## Headers - away < 2.5 to the right, improve=37.26344, (0 missing)
## Total Crosses - home < 4.5 to the left, improve=33.17211, (0 missing)
## Corners - away < 2.5 to the right, improve=31.75284, (0 missing)
## Surrogate splits:
## Total Crosses - home < 2.5 to the right, agree=0.810, adj=0.606, (0 split)
## Shots Total - home < 1.5 to the right, agree=0.767, adj=0.518, (0 split)
## Dangerous Attacks - home < 7.5 to the right, agree=0.752, adj=0.486, (0 split)
## Accurate Crosses - home < 0.5 to the right, agree=0.729, adj=0.438, (0 split)
## Shots Insidebox - home < 1.5 to the right, agree=0.728, adj=0.437, (0 split)
##
## Node number 23: 3097 observations, complexity param=0.00437203
## predicted class=X expected loss=0.5699064 P(node) =0.1410677
## class counts: 1171 594 1332
## probabilities: 0.378 0.192 0.430
## left son=46 (561 obs) right son=47 (2536 obs)
## Primary splits:
## Ball Safe - away < 12.5 to the right, improve=32.74863, (0 missing)
## Fouls - away < 6.5 to the left, improve=29.66562, (0 missing)
## Counter Attacks - home < 2.5 to the left, improve=25.11336, (0 missing)
## Ball Safe - home < 7.5 to the right, improve=24.04746, (0 missing)
## Headers - home < 1.5 to the left, improve=23.04691, (0 missing)
## Surrogate splits:
## Ball Safe - home < 15.5 to the right, agree=0.957, adj=0.763, (0 split)
## Goal Attempts - home < 0.5 to the right, agree=0.902, adj=0.462, (0 split)
## Goal Attempts - away < 1.5 to the right, agree=0.895, adj=0.419, (0 split)
## Injuries - home < 0.5 to the right, agree=0.849, adj=0.166, (0 split)
## Injuries - away < 0.5 to the right, agree=0.835, adj=0.091, (0 split)
##
## Node number 24: 2076 observations, complexity param=0.004334012
## predicted class=2 expected loss=0.5833333 P(node) =0.09456136
## class counts: 701 865 510
## probabilities: 0.338 0.417 0.246
## left son=48 (1403 obs) right son=49 (673 obs)
## Primary splits:
## Successful Passes Percentage - away < 86.5 to the left, improve=40.17345, (0 missing)
## Throwins - home < 8.5 to the left, improve=30.17830, (0 missing)
## Shots Total - away < 6.5 to the right, improve=25.14732, (0 missing)
## Corners - home < 2.5 to the right, improve=24.26579, (0 missing)
## X < 3.55 to the right, improve=22.53667, (0 missing)
## Surrogate splits:
## Ball Possession % - home < 32.5 to the right, agree=0.728, adj=0.160, (0 split)
## Successful Passes - away < 113.5 to the left, agree=0.723, adj=0.146, (0 split)
## Passes - away < 156.5 to the left, agree=0.703, adj=0.083, (0 split)
## Goals - home < 0.5 to the left, agree=0.693, adj=0.052, (0 split)
## Shots On Target - home < 2.5 to the left, agree=0.692, adj=0.051, (0 split)
##
## Node number 25: 172 observations
## predicted class=X expected loss=0.4127907 P(node) =0.007834563
## class counts: 61 10 101
## probabilities: 0.355 0.058 0.587
##
## Node number 26: 1925 observations
## predicted class=2 expected loss=0.3844156 P(node) =0.08768334
## class counts: 364 1185 376
## probabilities: 0.189 0.616 0.195
##
## Node number 27: 1827 observations, complexity param=0.004334012
## predicted class=2 expected loss=0.5363985 P(node) =0.08321946
## class counts: 247 847 733
## probabilities: 0.135 0.464 0.401
## left son=54 (1152 obs) right son=55 (675 obs)
## Primary splits:
## Ball Possession % - home < 41.5 to the right, improve=67.75384, (0 missing)
## Successful Passes Percentage - home < 72.5 to the right, improve=51.82687, (0 missing)
## Successful Passes Percentage - away < 83.5 to the left, improve=50.34655, (0 missing)
## Fouls - away < 1.5 to the right, improve=49.70352, (0 missing)
## Long Passes - home < 3.5 to the right, improve=31.55224, (0 missing)
## Surrogate splits:
## Successful Passes Percentage - away < 86.5 to the left, agree=0.779, adj=0.401, (0 split)
## Successful Passes - away < 192.5 to the left, agree=0.770, adj=0.376, (0 split)
## Passes - away < 217.5 to the left, agree=0.761, adj=0.354, (0 split)
## Successful Passes - home < 83.5 to the right, agree=0.745, adj=0.311, (0 split)
## Passes - home < 104.5 to the right, agree=0.738, adj=0.292, (0 split)
##
## Node number 42: 2027 observations
## predicted class=1 expected loss=0.4158855 P(node) =0.09232942
## class counts: 1184 243 600
## probabilities: 0.584 0.120 0.296
##
## Node number 43: 986 observations, complexity param=0.00437203
## predicted class=X expected loss=0.5598377 P(node) =0.04491209
## class counts: 378 174 434
## probabilities: 0.383 0.176 0.440
## left son=86 (481 obs) right son=87 (505 obs)
## Primary splits:
## Accurate Crosses - away < 0.5 to the left, improve=37.66464, (0 missing)
## Successful Passes Percentage - home < 80.5 to the left, improve=35.59980, (0 missing)
## Interceptions - away < 5.5 to the left, improve=34.58036, (0 missing)
## Challenges - away < 5.5 to the left, improve=33.92129, (0 missing)
## Goal Attempts - home < 3.5 to the left, improve=32.86290, (0 missing)
## Surrogate splits:
## Shots Insidebox - away < 0.5 to the left, agree=0.754, adj=0.495, (0 split)
## Shots Total - away < 1.5 to the left, agree=0.742, adj=0.472, (0 split)
## Total Crosses - away < 2.5 to the left, agree=0.733, adj=0.453, (0 split)
## Shots Blocked - away < 0.5 to the left, agree=0.732, adj=0.451, (0 split)
## Key Passes - away < 0.5 to the left, agree=0.722, adj=0.430, (0 split)
##
## Node number 44: 1248 observations, complexity param=0.00437203
## predicted class=2 expected loss=0.59375 P(node) =0.05684613
## class counts: 449 507 292
## probabilities: 0.360 0.406 0.234
## left son=88 (191 obs) right son=89 (1057 obs)
## Primary splits:
## Tackles - home < 7.5 to the right, improve=45.12028, (0 missing)
## Dribble Attempts - away < 3.5 to the right, improve=32.52919, (0 missing)
## Goals - away < 0.5 to the right, improve=28.48359, (0 missing)
## Challenges - home < 7.5 to the left, improve=28.15158, (0 missing)
## Shots On Target - away < 0.5 to the right, improve=27.36109, (0 missing)
## Surrogate splits:
## Successful Interceptions - home < 7.5 to the right, agree=0.924, adj=0.503, (0 split)
## Dribble Attempts - away < 10.5 to the right, agree=0.871, adj=0.157, (0 split)
## Throwins - away < 11.5 to the right, agree=0.867, adj=0.131, (0 split)
## Attacks - away < 56.5 to the right, agree=0.858, adj=0.073, (0 split)
## Successful Passes - away < 251 to the right, agree=0.858, adj=0.073, (0 split)
##
## Node number 45: 1337 observations, complexity param=0.00437203
## predicted class=1 expected loss=0.5445026 P(node) =0.06090006
## class counts: 609 255 473
## probabilities: 0.455 0.191 0.354
## left son=90 (478 obs) right son=91 (859 obs)
## Primary splits:
## Headers - away < 2.5 to the right, improve=37.15690, (0 missing)
## Shots Off Target - away < 1.5 to the right, improve=36.09413, (0 missing)
## Long Passes - home < 4.5 to the right, improve=32.79158, (0 missing)
## Shots Total - away < 2.5 to the right, improve=23.74699, (0 missing)
## Key Passes - away < 3.5 to the right, improve=23.02399, (0 missing)
## Surrogate splits:
## Successful Headers - away < 1.5 to the right, agree=0.938, adj=0.826, (0 split)
## Attacks - away < 24.5 to the right, agree=0.801, adj=0.444, (0 split)
## Successful Headers - home < 1.5 to the right, agree=0.794, adj=0.425, (0 split)
## minute < 20.5 to the right, agree=0.792, adj=0.418, (0 split)
## Headers - home < 2.5 to the right, agree=0.792, adj=0.418, (0 split)
##
## Node number 46: 561 observations
## predicted class=1 expected loss=0.4474153 P(node) =0.02555343
## class counts: 310 77 174
## probabilities: 0.553 0.137 0.310
##
## Node number 47: 2536 observations
## predicted class=X expected loss=0.5433754 P(node) =0.1155143
## class counts: 861 517 1158
## probabilities: 0.340 0.204 0.457
##
## Node number 48: 1403 observations
## predicted class=2 expected loss=0.5067712 P(node) =0.06390635
## class counts: 442 692 269
## probabilities: 0.315 0.493 0.192
##
## Node number 49: 673 observations, complexity param=0.004334012
## predicted class=1 expected loss=0.615156 P(node) =0.03065501
## class counts: 259 173 241
## probabilities: 0.385 0.257 0.358
## left son=98 (285 obs) right son=99 (388 obs)
## Primary splits:
## X < 3.325 to the right, improve=36.03419, (0 missing)
## Accurate Crosses - home < 0.5 to the left, improve=28.52197, (0 missing)
## Successful Interceptions - home < 2.5 to the right, improve=22.93164, (0 missing)
## Goals - away < 0.5 to the right, improve=22.81888, (0 missing)
## 2 < 2.225 to the left, improve=22.46129, (0 missing)
## Surrogate splits:
## 2 < 2.15 to the left, agree=0.684, adj=0.253, (0 split)
## 1 < 3.05 to the left, agree=0.672, adj=0.225, (0 split)
## Dangerous Attacks - away < 6.5 to the left, agree=0.669, adj=0.218, (0 split)
## Total Crosses - away < 2.5 to the left, agree=0.664, adj=0.207, (0 split)
## Attacks - away < 16.5 to the left, agree=0.661, adj=0.200, (0 split)
##
## Node number 54: 1152 observations
## predicted class=2 expected loss=0.4678819 P(node) =0.05247335
## class counts: 215 613 324
## probabilities: 0.187 0.532 0.281
##
## Node number 55: 675 observations
## predicted class=X expected loss=0.3940741 P(node) =0.03074611
## class counts: 32 234 409
## probabilities: 0.047 0.347 0.606
##
## Node number 86: 481 observations
## predicted class=1 expected loss=0.4906445 P(node) =0.02190945
## class counts: 245 98 138
## probabilities: 0.509 0.204 0.287
##
## Node number 87: 505 observations
## predicted class=X expected loss=0.4138614 P(node) =0.02300264
## class counts: 133 76 296
## probabilities: 0.263 0.150 0.586
##
## Node number 88: 191 observations
## predicted class=1 expected loss=0.4031414 P(node) =0.008700009
## class counts: 114 9 68
## probabilities: 0.597 0.047 0.356
##
## Node number 89: 1057 observations
## predicted class=2 expected loss=0.5288553 P(node) =0.04814612
## class counts: 335 498 224
## probabilities: 0.317 0.471 0.212
##
## Node number 90: 478 observations
## predicted class=1 expected loss=0.3702929 P(node) =0.0217728
## class counts: 301 72 105
## probabilities: 0.630 0.151 0.220
##
## Node number 91: 859 observations
## predicted class=X expected loss=0.5715949 P(node) =0.03912727
## class counts: 308 183 368
## probabilities: 0.359 0.213 0.428
##
## Node number 98: 285 observations
## predicted class=1 expected loss=0.3964912 P(node) =0.01298169
## class counts: 172 49 64
## probabilities: 0.604 0.172 0.225
##
## Node number 99: 388 observations
## predicted class=X expected loss=0.5438144 P(node) =0.01767332
## class counts: 87 124 177
## probabilities: 0.224 0.320 0.456
rpart.plot(first_half_result$model, main = "First Half Decision Tree", type = 3, extra = 104)
## Warning: Cannot retrieve the data used to build the model (so cannot determine roundint and is.binary for the variables).
## To silence this warning:
## Call rpart.plot with roundint=FALSE,
## or rebuild the rpart model with model=TRUE.
second_half_result <- train_decision_tree(second_half_data, tree_columns, "result")
print("Second Half Accuracy:")
## [1] "Second Half Accuracy:"
print(round(second_half_result$accuracy, 2))
## [1] 0.74
print("Second Half Confusion Matrix:")
## [1] "Second Half Confusion Matrix:"
print(second_half_result$confusion_matrix)
## Actual
## Predicted 1 2 X
## 1 1830 119 411
## 2 69 1138 279
## X 250 205 779
cat("Feature Importance (Second Half):\n")
## Feature Importance (Second Half):
importance_df_second_half <- data.frame(
Feature = names(second_half_result$feature_importances),
Importance = second_half_result$feature_importances
)
importance_df_second_half <- importance_df_second_half[order(-importance_df_second_half$Importance), ]
print(importance_df_second_half)
## Feature
## 1 1
## 2 2
## X X
## Goals - home Goals - home
## Goals - away Goals - away
## Shots On Target - home Shots On Target - home
## Shots On Target - away Shots On Target - away
## minute minute
## Shots Insidebox - away Shots Insidebox - away
## Substitutions - away Substitutions - away
## Attacks - away Attacks - away
## Successful Passes - away Successful Passes - away
## Passes - away Passes - away
## Ball Possession % - home Ball Possession % - home
## Counter Attacks - away Counter Attacks - away
## Shots Off Target - away Shots Off Target - away
## Successful Passes Percentage - away Successful Passes Percentage - away
## Successful Passes Percentage - home Successful Passes Percentage - home
## Total Crosses - home Total Crosses - home
## Counter Attacks - home Counter Attacks - home
## Throwins - home Throwins - home
## Shots Total - away Shots Total - away
## Saves - away Saves - away
## Offsides - home Offsides - home
## Total Crosses - away Total Crosses - away
## Dangerous Attacks - away Dangerous Attacks - away
## Importance
## 1 4757.1307064
## 2 4232.6376530
## X 2728.8005056
## Goals - home 1423.3571831
## Goals - away 1417.5046962
## Shots On Target - home 522.9331682
## Shots On Target - away 224.5596169
## minute 167.2934528
## Shots Insidebox - away 135.5681141
## Substitutions - away 114.5769323
## Attacks - away 95.7634486
## Successful Passes - away 56.3311583
## Passes - away 53.3804034
## Ball Possession % - home 37.0335593
## Counter Attacks - away 33.4242646
## Shots Off Target - away 30.5672919
## Successful Passes Percentage - away 27.1198813
## Successful Passes Percentage - home 10.1907121
## Total Crosses - home 9.8403151
## Counter Attacks - home 2.8245857
## Throwins - home 2.7788447
## Shots Total - away 2.5009602
## Saves - away 1.8830572
## Offsides - home 0.5557689
## Total Crosses - away 0.5557689
## Dangerous Attacks - away 0.2778845
print("Decision Tree Details (Second Half):")
## [1] "Decision Tree Details (Second Half):"
printcp(second_half_result$model)
##
## Classification tree:
## rpart(formula = y_train ~ ., data = X_train, method = "class",
## control = rpart.control(maxdepth = maxdepth, minsplit = minsplit,
## cp = cp))
##
## Variables actually used in tree construction:
## [1] 1 2 Counter Attacks - away
## [4] Goals - away Shots Off Target - away Successful Passes - away
## [7] X
##
## Root node error: 11729/20326 = 0.57704
##
## n= 20326
##
## CP nsplit rel error xerror xstd
## 1 0.3181857 0 1.00000 1.00000 0.0060051
## 2 0.2042800 1 0.68181 0.68181 0.0059380
## 3 0.0053287 2 0.47753 0.47753 0.0054309
## 4 0.0049876 6 0.45622 0.47165 0.0054100
## 5 0.0040000 8 0.44624 0.45792 0.0053596
summary(second_half_result$model)
## Call:
## rpart(formula = y_train ~ ., data = X_train, method = "class",
## control = rpart.control(maxdepth = maxdepth, minsplit = minsplit,
## cp = cp))
## n= 20326
##
## CP nsplit rel error xerror xstd
## 1 0.318185694 0 1.0000000 1.0000000 0.006005055
## 2 0.204279990 1 0.6818143 0.6818143 0.005938005
## 3 0.005328673 2 0.4775343 0.4775343 0.005430921
## 4 0.004987637 6 0.4562196 0.4716515 0.005409996
## 5 0.004000000 8 0.4462444 0.4579248 0.005359618
##
## Variable importance
## 1 2 X
## 30 26 17
## Goals - home Goals - away Shots On Target - home
## 9 9 3
## Shots On Target - away minute Shots Insidebox - away
## 1 1 1
## Substitutions - away Attacks - away
## 1 1
##
## Node number 1: 20326 observations, complexity param=0.3181857
## predicted class=1 expected loss=0.5770442 P(node) =1
## class counts: 8597 5852 5877
## probabilities: 0.423 0.288 0.289
## left son=2 (8165 obs) right son=3 (12161 obs)
## Primary splits:
## 1 < 2.15 to the left, improve=3403.4630, (0 missing)
## 2 < 1.925 to the right, improve=2962.0500, (0 missing)
## Goals - home < 0.5 to the right, improve=1518.9900, (0 missing)
## X < 2.685 to the right, improve=1197.2940, (0 missing)
## Goals - away < 1.5 to the left, improve= 933.3042, (0 missing)
## Surrogate splits:
## 2 < 9.25 to the right, agree=0.896, adj=0.740, (0 split)
## Goals - home < 1.5 to the right, agree=0.762, adj=0.408, (0 split)
## X < 3.675 to the right, agree=0.713, adj=0.285, (0 split)
## Goals - away < 0.5 to the left, agree=0.683, adj=0.211, (0 split)
## Shots On Target - home < 4.5 to the right, agree=0.658, adj=0.149, (0 split)
##
## Node number 2: 8165 observations
## predicted class=1 expected loss=0.1740355 P(node) =0.4017023
## class counts: 6744 267 1154
## probabilities: 0.826 0.033 0.141
##
## Node number 3: 12161 observations, complexity param=0.20428
## predicted class=2 expected loss=0.540745 P(node) =0.5982977
## class counts: 1853 5585 4723
## probabilities: 0.152 0.459 0.388
## left son=6 (5204 obs) right son=7 (6957 obs)
## Primary splits:
## 2 < 1.69 to the left, improve=1585.0610, (0 missing)
## X < 3.325 to the right, improve=1558.8880, (0 missing)
## 1 < 16 to the right, improve=1202.3820, (0 missing)
## Goals - away < 1.5 to the right, improve= 562.4236, (0 missing)
## Shots Insidebox - away < 3.5 to the right, improve= 144.6099, (0 missing)
## Surrogate splits:
## X < 3.225 to the right, agree=0.986, adj=0.966, (0 split)
## 1 < 10.5 to the right, agree=0.899, adj=0.763, (0 split)
## Goals - away < 1.5 to the right, agree=0.750, adj=0.415, (0 split)
## Shots On Target - away < 3.5 to the right, agree=0.626, adj=0.126, (0 split)
## Shots Insidebox - away < 6.5 to the right, agree=0.608, adj=0.085, (0 split)
##
## Node number 6: 5204 observations
## predicted class=2 expected loss=0.2052267 P(node) =0.2560268
## class counts: 190 4136 878
## probabilities: 0.037 0.795 0.169
##
## Node number 7: 6957 observations, complexity param=0.005328673
## predicted class=X expected loss=0.4473192 P(node) =0.342271
## class counts: 1663 1449 3845
## probabilities: 0.239 0.208 0.553
## left son=14 (4290 obs) right son=15 (2667 obs)
## Primary splits:
## X < 1.69 to the right, improve=212.0588, (0 missing)
## 2 < 4.875 to the left, improve=179.2189, (0 missing)
## minute < 28.5 to the left, improve=170.7462, (0 missing)
## Substitutions - away < 1.5 to the left, improve=160.2496, (0 missing)
## 1 < 6.25 to the left, improve=151.9271, (0 missing)
## Surrogate splits:
## minute < 29.5 to the left, agree=0.919, adj=0.789, (0 split)
## Substitutions - away < 2.5 to the left, agree=0.824, adj=0.540, (0 split)
## 2 < 6.25 to the left, agree=0.795, adj=0.465, (0 split)
## 1 < 6.25 to the left, agree=0.755, adj=0.361, (0 split)
## Attacks - away < 72.5 to the left, agree=0.737, adj=0.314, (0 split)
##
## Node number 14: 4290 observations, complexity param=0.005328673
## predicted class=X expected loss=0.5596737 P(node) =0.2110597
## class counts: 1255 1146 1889
## probabilities: 0.293 0.267 0.440
## left son=28 (3022 obs) right son=29 (1268 obs)
## Primary splits:
## 1 < 4.165 to the left, improve=67.97437, (0 missing)
## Shots Insidebox - home < 4.5 to the right, improve=51.17675, (0 missing)
## Successful Passes - away < 179.5 to the left, improve=48.93978, (0 missing)
## 2 < 3.325 to the right, improve=46.78760, (0 missing)
## Dribble Attempts - home < 8.5 to the right, improve=46.23388, (0 missing)
## Surrogate splits:
## 2 < 2.81 to the right, agree=0.839, adj=0.454, (0 split)
## Passes - away < 383.5 to the left, agree=0.775, adj=0.238, (0 split)
## Successful Passes - away < 321.5 to the left, agree=0.774, adj=0.235, (0 split)
## Ball Possession % - home < 37.5 to the right, agree=0.762, adj=0.196, (0 split)
## Attacks - away < 77.5 to the left, agree=0.749, adj=0.151, (0 split)
##
## Node number 15: 2667 observations
## predicted class=X expected loss=0.2665917 P(node) =0.1312113
## class counts: 408 303 1956
## probabilities: 0.153 0.114 0.733
##
## Node number 28: 3022 observations, complexity param=0.005328673
## predicted class=X expected loss=0.5622105 P(node) =0.1486766
## class counts: 1062 637 1323
## probabilities: 0.351 0.211 0.438
## left son=56 (1555 obs) right son=57 (1467 obs)
## Primary splits:
## Goals - away < 0.5 to the right, improve=42.83603, (0 missing)
## Shots Insidebox - home < 4.5 to the right, improve=39.32185, (0 missing)
## Counter Attacks - away < 4.5 to the left, improve=31.32226, (0 missing)
## Successful Passes - away < 180.5 to the left, improve=31.01236, (0 missing)
## Accurate Crosses - away < 6.5 to the left, improve=30.36431, (0 missing)
## Surrogate splits:
## Goals - home < 0.5 to the right, agree=0.919, adj=0.834, (0 split)
## Shots On Target - home < 2.5 to the right, agree=0.666, adj=0.312, (0 split)
## Shots On Target - away < 1.5 to the right, agree=0.665, adj=0.311, (0 split)
## Successful Passes Percentage - home < 80.5 to the right, agree=0.630, adj=0.238, (0 split)
## Total Crosses - home < 11.5 to the right, agree=0.626, adj=0.230, (0 split)
##
## Node number 29: 1268 observations, complexity param=0.005328673
## predicted class=X expected loss=0.5536278 P(node) =0.06238315
## class counts: 193 509 566
## probabilities: 0.152 0.401 0.446
## left son=58 (561 obs) right son=59 (707 obs)
## Primary splits:
## Successful Passes - away < 276.5 to the left, improve=40.35611, (0 missing)
## Passes - away < 334.5 to the left, improve=39.59947, (0 missing)
## Shots Insidebox - away < 4.5 to the left, improve=39.25581, (0 missing)
## Tackles - away < 3.5 to the left, improve=35.62239, (0 missing)
## Accurate Crosses - away < 4.5 to the left, improve=30.96487, (0 missing)
## Surrogate splits:
## Passes - away < 320.5 to the left, agree=0.965, adj=0.922, (0 split)
## Successful Passes Percentage - away < 82.5 to the left, agree=0.855, adj=0.672, (0 split)
## Ball Possession % - home < 47.5 to the right, agree=0.818, adj=0.588, (0 split)
## Attacks - away < 66.5 to the left, agree=0.765, adj=0.469, (0 split)
## X < 2.685 to the right, agree=0.736, adj=0.403, (0 split)
##
## Node number 56: 1555 observations, complexity param=0.004987637
## predicted class=1 expected loss=0.5665595 P(node) =0.076503
## class counts: 674 327 554
## probabilities: 0.433 0.210 0.356
## left son=112 (1484 obs) right son=113 (71 obs)
## Primary splits:
## Counter Attacks - away < 4.5 to the left, improve=33.42426, (0 missing)
## Shots Off Target - away < 0.5 to the right, improve=29.42211, (0 missing)
## Dribble Attempts - away < 14.5 to the right, improve=28.19578, (0 missing)
## Dribble Attempts - home < 8.5 to the right, improve=24.64382, (0 missing)
## Shots On Target - away < 0.5 to the right, improve=23.84440, (0 missing)
## Surrogate splits:
## Shots On Target - away < 0.5 to the right, agree=0.969, adj=0.324, (0 split)
## Counter Attacks - home < 4.5 to the left, agree=0.958, adj=0.085, (0 split)
## Saves - away < 7.5 to the left, agree=0.957, adj=0.056, (0 split)
## Shots On Target - home < 9.5 to the left, agree=0.957, adj=0.056, (0 split)
## Shots Insidebox - away < 10.5 to the left, agree=0.956, adj=0.028, (0 split)
##
## Node number 57: 1467 observations
## predicted class=X expected loss=0.475801 P(node) =0.07217357
## class counts: 388 310 769
## probabilities: 0.264 0.211 0.524
##
## Node number 58: 561 observations
## predicted class=2 expected loss=0.4777184 P(node) =0.02760012
## class counts: 105 293 163
## probabilities: 0.187 0.522 0.291
##
## Node number 59: 707 observations
## predicted class=X expected loss=0.4299859 P(node) =0.03478304
## class counts: 88 216 403
## probabilities: 0.124 0.306 0.570
##
## Node number 112: 1484 observations, complexity param=0.004987637
## predicted class=1 expected loss=0.5491914 P(node) =0.07300994
## class counts: 669 274 541
## probabilities: 0.451 0.185 0.365
## left son=224 (1374 obs) right son=225 (110 obs)
## Primary splits:
## Shots Off Target - away < 0.5 to the right, improve=30.56729, (0 missing)
## Dribble Attempts - away < 16.5 to the right, improve=26.76614, (0 missing)
## Free Kicks - home < 6.5 to the right, improve=25.72035, (0 missing)
## Dribble Attempts - home < 8.5 to the right, improve=23.98845, (0 missing)
## Shots Outsidebox - away < 6.5 to the right, improve=22.47260, (0 missing)
## Surrogate splits:
## Throwins - home < 24.5 to the left, agree=0.933, adj=0.091, (0 split)
## Shots Total - away < 1.5 to the right, agree=0.932, adj=0.082, (0 split)
## Offsides - home < 4.5 to the left, agree=0.927, adj=0.018, (0 split)
## Total Crosses - away < 1.5 to the right, agree=0.927, adj=0.018, (0 split)
## Dangerous Attacks - away < 5.5 to the right, agree=0.927, adj=0.009, (0 split)
##
## Node number 113: 71 observations
## predicted class=2 expected loss=0.2535211 P(node) =0.003493063
## class counts: 5 53 13
## probabilities: 0.070 0.746 0.183
##
## Node number 224: 1374 observations
## predicted class=1 expected loss=0.5189229 P(node) =0.06759815
## class counts: 661 249 464
## probabilities: 0.481 0.181 0.338
##
## Node number 225: 110 observations
## predicted class=X expected loss=0.3 P(node) =0.005411788
## class counts: 8 25 77
## probabilities: 0.073 0.227 0.700
rpart.plot(second_half_result$model, main = "Second Half Decision Tree", type = 3, extra = 104)
## Warning: Cannot retrieve the data used to build the model (so cannot determine roundint and is.binary for the variables).
## To silence this warning:
## Call rpart.plot with roundint=FALSE,
## or rebuild the rpart model with model=TRUE.
print("First Half Accuracy:")
## [1] "First Half Accuracy:"
print(round(first_half_result$accuracy, 2))
## [1] 0.62
print("Second Half Accuracy:")
## [1] "Second Half Accuracy:"
print(round(second_half_result$accuracy, 2))
## [1] 0.74
As we can see from the results, 1,2 and X are the most importans features for the match result prediction. We eliminated some features w/ high correlation during our analysis. Also eliminating extraordinary events helped us to have higher accuracy levels. Additionally, our second half accuracy is reasonable higher (0.74) than our first half accuracy (0.62). Possible reasons for the higher accuracy in the second half than the first half could be that the second half is generally more dynamic and predictable, team strategies become more apparent, and the tempo of the match increases. Additionally, irregularities in the dataset and some features becoming more significant in the second half may have helped the model make more accurate predictions. Changes in teams’ playing style and strategies may also affect this difference.